mirror of
https://github.com/gorhill/uBlock.git
synced 2026-03-11 09:04:36 +00:00
[mv3] Add support for regex-based extended filters
Related issues: - https://github.com/uBlockOrigin/uBOL-home/issues/223 - https://github.com/uBlockOrigin/uAssets/issues/31261
This commit is contained in:
parent
8ad61208e7
commit
377cf9d86b
5 changed files with 98 additions and 26 deletions
|
|
@ -131,10 +131,7 @@
|
|||
return -1;
|
||||
};
|
||||
|
||||
const lookupHostname = (hostname, data) => {
|
||||
const listref = binarySearch(data.hostnames, hostname);
|
||||
if ( listref === -1 ) { return; }
|
||||
const ilist = data.selectorListRefs[listref];
|
||||
const selectorsFromListIndex = (data, ilist) => {
|
||||
const list = JSON.parse(`[${data.selectorLists[ilist]}]`);
|
||||
const { result } = data;
|
||||
for ( const iselector of list ) {
|
||||
|
|
@ -146,6 +143,21 @@
|
|||
}
|
||||
};
|
||||
|
||||
const lookupHostname = (hostname, data) => {
|
||||
const listref = binarySearch(data.hostnames, hostname);
|
||||
if ( listref !== -1 ) {
|
||||
selectorsFromListIndex(data, data.selectorListRefs[listref]);
|
||||
}
|
||||
const { fromRegexes } = data;
|
||||
for ( let i = 0, n = fromRegexes.length; i < n; i += 2 ) {
|
||||
if ( typeof fromRegexes[i+0] === 'string' ) {
|
||||
fromRegexes[i+0] = new RegExp(fromRegexes[i+0]);
|
||||
}
|
||||
if ( fromRegexes[i+0].test(hostname) === false ) { continue; }
|
||||
selectorsFromListIndex(data, fromRegexes[i+1]);
|
||||
}
|
||||
};
|
||||
|
||||
const selectorsFromRuleset = async (realm, rulesetId, result) => {
|
||||
const data = await localRead(`css.${realm}.${rulesetId}`);
|
||||
if ( typeof data !== 'object' || data === null ) { return; }
|
||||
|
|
|
|||
|
|
@ -111,6 +111,8 @@ const logProgress = text => {
|
|||
process?.stdout?.write?.(text.length > 120 ? `${text.slice(0, 119)}… ` : `${text} `);
|
||||
};
|
||||
|
||||
const isHnRegexOrPath = hn => hn.includes('/');
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
async function fetchText(url, cacheDir) {
|
||||
|
|
@ -828,7 +830,24 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
// Collate all distinct selectors
|
||||
const allSelectors = new Map();
|
||||
const allHostnames = new Map();
|
||||
const allRegexesOrPaths = new Map();
|
||||
let hasEntities = false;
|
||||
|
||||
const storeHostnameSelectorPair = (hn, iSelector) => {
|
||||
if ( isHnRegexOrPath(hn) ) {
|
||||
if ( allRegexesOrPaths.has(hn) === false ) {
|
||||
allRegexesOrPaths.set(hn, new Set());
|
||||
}
|
||||
allRegexesOrPaths.get(hn).add(iSelector);
|
||||
} else {
|
||||
if ( allHostnames.has(hn) === false ) {
|
||||
allHostnames.set(hn, new Set());
|
||||
}
|
||||
allHostnames.get(hn).add(iSelector);
|
||||
hasEntities ||= hn.endsWith('.*');
|
||||
}
|
||||
};
|
||||
|
||||
for ( const [ selector, details ] of mapin ) {
|
||||
if ( details.rejected ) { continue; }
|
||||
if ( allSelectors.has(selector) === false ) {
|
||||
|
|
@ -837,30 +856,30 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
const iSelector = allSelectors.get(selector);
|
||||
if ( details.matches ) {
|
||||
for ( const hn of details.matches ) {
|
||||
if ( allHostnames.has(hn) === false ) {
|
||||
allHostnames.set(hn, new Set());
|
||||
}
|
||||
allHostnames.get(hn).add(iSelector);
|
||||
hasEntities ||= hn.endsWith('.*');
|
||||
storeHostnameSelectorPair(hn, iSelector);
|
||||
}
|
||||
}
|
||||
if ( details.excludeMatches ) {
|
||||
for ( const hn of details.excludeMatches ) {
|
||||
if ( allHostnames.has(hn) === false ) {
|
||||
allHostnames.set(hn, new Set());
|
||||
}
|
||||
allHostnames.get(hn).add(~iSelector);
|
||||
hasEntities ||= hn.endsWith('.*');
|
||||
storeHostnameSelectorPair(hn, ~iSelector);
|
||||
}
|
||||
}
|
||||
}
|
||||
const allSelectorLists = new Map();
|
||||
for ( const [ hn, selectorSet ] of allHostnames ) {
|
||||
|
||||
const ilistFromSelectorSet = selectorSet => {
|
||||
const list = JSON.stringify(Array.from(selectorSet).sort()).slice(1, -1);
|
||||
if ( allSelectorLists.has(list) === false ) {
|
||||
allSelectorLists.set(list, allSelectorLists.size);
|
||||
}
|
||||
allHostnames.set(hn, allSelectorLists.get(list));
|
||||
return allSelectorLists.get(list);
|
||||
};
|
||||
|
||||
for ( const [ hn, selectorSet ] of allHostnames ) {
|
||||
allHostnames.set(hn, ilistFromSelectorSet(selectorSet));
|
||||
}
|
||||
for ( const [ regexOrPath, selectorSet ] of allRegexesOrPaths ) {
|
||||
allRegexesOrPaths.set(regexOrPath, ilistFromSelectorSet(selectorSet));
|
||||
}
|
||||
|
||||
const sortedHostnames = Array.from(allHostnames.keys()).toSorted((a, b) => {
|
||||
|
|
@ -875,6 +894,10 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
selectorListRefs: sortedHostnames.map(a => allHostnames.get(a)),
|
||||
hostnames: sortedHostnames,
|
||||
hasEntities,
|
||||
fromRegexes: Array.from(allRegexesOrPaths)
|
||||
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
|
||||
.map(a => [ a[0].slice(1, -1), a[1] ])
|
||||
.flat(),
|
||||
});
|
||||
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, data);
|
||||
|
||||
|
|
@ -890,7 +913,7 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
|
||||
log(`CSS-${realm}: ${allSelectors.size} distinct filters for ${allHostnames.size} distinct hostnames`);
|
||||
|
||||
return sortedHostnames.length;
|
||||
return sortedHostnames.length + allRegexesOrPaths.size;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ const worldTemplate = {
|
|||
args: new Map(),
|
||||
arglists: new Map(),
|
||||
hostnames: new Map(),
|
||||
regexesOrPaths: new Map(),
|
||||
matches: new Set(),
|
||||
hasEntities: false,
|
||||
hasAncestors: false,
|
||||
|
|
@ -126,6 +127,15 @@ export function compile(assetDetails, details) {
|
|||
const arglistIndex = worldDetails.arglists.get(arglistKey);
|
||||
if ( details.matches ) {
|
||||
for ( const hn of details.matches ) {
|
||||
if ( hn.includes('/') ) {
|
||||
worldDetails.matches.clear();
|
||||
worldDetails.matches.add('*');
|
||||
if ( worldDetails.regexesOrPaths.has(hn) === false ) {
|
||||
worldDetails.regexesOrPaths.set(hn, new Set());
|
||||
}
|
||||
worldDetails.regexesOrPaths.get(hn).add(arglistIndex);
|
||||
continue;
|
||||
}
|
||||
const isEntity = hn.endsWith('.*') || hn.endsWith('.*>>');
|
||||
worldDetails.hasEntities ||= isEntity;
|
||||
const isAncestor = hn.endsWith('>>')
|
||||
|
|
@ -147,6 +157,13 @@ export function compile(assetDetails, details) {
|
|||
}
|
||||
if ( details.excludeMatches ) {
|
||||
for ( const hn of details.excludeMatches ) {
|
||||
if ( hn.includes('/') ) {
|
||||
if ( worldDetails.regexesOrPaths.has(hn) === false ) {
|
||||
worldDetails.regexesOrPaths.set(hn, new Set());
|
||||
}
|
||||
worldDetails.regexesOrPaths.get(hn).add(~arglistIndex);
|
||||
continue;
|
||||
}
|
||||
if ( worldDetails.hostnames.has(hn) === false ) {
|
||||
worldDetails.hostnames.set(hn, new Set());
|
||||
}
|
||||
|
|
@ -172,9 +189,17 @@ export async function commit(rulesetId, path, writeFn) {
|
|||
if ( d !== 0 ) { return d; }
|
||||
return a[0] < b[0] ? -1 : 1;
|
||||
}).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)]));
|
||||
let content = safeReplace(scriptletTemplate, /\$rulesetId\$/, rulesetId, 0);
|
||||
content = safeReplace(content, 'self.$hasEntities$', 'true');
|
||||
content = safeReplace(content, 'self.$hasAncestors$', 'true');
|
||||
const scriptletFromRegexes = Array.from(worldDetails.regexesOrPaths)
|
||||
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
|
||||
.map(a => [ a[0].slice(1, -1), JSON.stringify(Array.from(a[1])).slice(1,-1) ])
|
||||
.flat();
|
||||
let content = safeReplace(scriptletTemplate, 'self.$hasEntities$', JSON.stringify(worldDetails.hasEntities));
|
||||
content = safeReplace(content, 'self.$hasAncestors$', JSON.stringify(worldDetails.hasAncestors));
|
||||
content = safeReplace(content, 'self.$hasRegexes$', JSON.stringify(scriptletFromRegexes.length !== 0));
|
||||
content = safeReplace(content,
|
||||
'self.$scriptletFromRegexes$',
|
||||
`/* ${worldDetails.regexesOrPaths.size} */ ${JSON.stringify(scriptletFromRegexes)}`
|
||||
);
|
||||
content = safeReplace(content,
|
||||
'self.$scriptletHostnames$',
|
||||
`/* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[0]))}`
|
||||
|
|
@ -199,6 +224,7 @@ export async function commit(rulesetId, path, writeFn) {
|
|||
'self.$scriptletCode$',
|
||||
Array.from(allFunctions.values()).sort().join('\n\n')
|
||||
);
|
||||
content = safeReplace(content, /\$rulesetId\$/, rulesetId, 0);
|
||||
writeFn(`${path}/${world.toLowerCase()}/${rulesetId}.js`, content);
|
||||
stats[world] = Array.from(worldDetails.matches).sort();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,8 +46,11 @@ const $scriptletArglistRefs$ = self.$scriptletArglistRefs$;
|
|||
|
||||
const $scriptletHostnames$ = self.$scriptletHostnames$;
|
||||
|
||||
const $scriptletFromRegexes$ = self.$scriptletFromRegexes$;
|
||||
|
||||
const $hasEntities$ = self.$hasEntities$;
|
||||
const $hasAncestors$ = self.$hasAncestors$;
|
||||
const $hasRegexes$ = self.$hasRegexes$;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
|
|
@ -134,11 +137,9 @@ if ( $hasAncestors$ ) {
|
|||
}
|
||||
$scriptletHostnames$.length = 0;
|
||||
|
||||
if ( todoIndices.size === 0 ) { return; }
|
||||
|
||||
// Collect arglist references
|
||||
const todo = new Set();
|
||||
{
|
||||
if ( todoIndices.size !== 0 ) {
|
||||
const arglistRefs = $scriptletArglistRefs$.split(';');
|
||||
for ( const i of todoIndices ) {
|
||||
for ( const ref of JSON.parse(`[${arglistRefs[i]}]`) ) {
|
||||
|
|
@ -146,6 +147,19 @@ const todo = new Set();
|
|||
}
|
||||
}
|
||||
}
|
||||
if ( $hasRegexes$ ) {
|
||||
const { hns } = entries[0];
|
||||
for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 2 ) {
|
||||
const regex = new RegExp($scriptletFromRegexes$[i+0]);
|
||||
for ( const hn of hns ) {
|
||||
if ( regex.test(hn) === false ) { continue; }
|
||||
for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+1]}]`) ) {
|
||||
todo.add(ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( todo.size === 0 ) { return; }
|
||||
|
||||
// Execute scriplets
|
||||
{
|
||||
|
|
|
|||
|
|
@ -112,7 +112,6 @@ function addExtendedToDNR(context, parser) {
|
|||
for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) {
|
||||
if ( bad ) { continue; }
|
||||
if ( exception ) { continue; }
|
||||
if ( isRegexOrPath(hn) ) { continue; }
|
||||
let details = context.scriptletFilters.get(argsToken);
|
||||
if ( details === undefined ) {
|
||||
context.scriptletFilters.set(argsToken, details = { args });
|
||||
|
|
@ -226,8 +225,6 @@ function addExtendedToDNR(context, parser) {
|
|||
for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) {
|
||||
if ( bad ) { continue; }
|
||||
if ( not && exception ) { continue; }
|
||||
// TODO: Support regex- and path-based entries
|
||||
if ( isRegexOrPath(hn) ) { continue; }
|
||||
if ( not || exception ) {
|
||||
excludeMatches.push(hn);
|
||||
} else if ( hn !== '*' ) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue