From 377cf9d86b217c4be9c62ae664b09fd77cf55d0b Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 25 Dec 2025 12:34:21 -0500 Subject: [PATCH] [mv3] Add support for regex-based extended filters Related issues: - https://github.com/uBlockOrigin/uBOL-home/issues/223 - https://github.com/uBlockOrigin/uAssets/issues/31261 --- .../extension/js/scripting/isolated-api.js | 20 ++++++-- platform/mv3/make-rulesets.js | 49 ++++++++++++++----- platform/mv3/make-scriptlets.js | 32 ++++++++++-- platform/mv3/scriptlets/scriptlet.template.js | 20 ++++++-- src/js/static-dnr-filtering.js | 3 -- 5 files changed, 98 insertions(+), 26 deletions(-) diff --git a/platform/mv3/extension/js/scripting/isolated-api.js b/platform/mv3/extension/js/scripting/isolated-api.js index ffb1565d8..837b16131 100644 --- a/platform/mv3/extension/js/scripting/isolated-api.js +++ b/platform/mv3/extension/js/scripting/isolated-api.js @@ -131,10 +131,7 @@ return -1; }; - const lookupHostname = (hostname, data) => { - const listref = binarySearch(data.hostnames, hostname); - if ( listref === -1 ) { return; } - const ilist = data.selectorListRefs[listref]; + const selectorsFromListIndex = (data, ilist) => { const list = JSON.parse(`[${data.selectorLists[ilist]}]`); const { result } = data; for ( const iselector of list ) { @@ -146,6 +143,21 @@ } }; + const lookupHostname = (hostname, data) => { + const listref = binarySearch(data.hostnames, hostname); + if ( listref !== -1 ) { + selectorsFromListIndex(data, data.selectorListRefs[listref]); + } + const { fromRegexes } = data; + for ( let i = 0, n = fromRegexes.length; i < n; i += 2 ) { + if ( typeof fromRegexes[i+0] === 'string' ) { + fromRegexes[i+0] = new RegExp(fromRegexes[i+0]); + } + if ( fromRegexes[i+0].test(hostname) === false ) { continue; } + selectorsFromListIndex(data, fromRegexes[i+1]); + } + }; + const selectorsFromRuleset = async (realm, rulesetId, result) => { const data = await localRead(`css.${realm}.${rulesetId}`); if ( typeof data !== 'object' || data === null ) { return; } diff --git a/platform/mv3/make-rulesets.js b/platform/mv3/make-rulesets.js index 65a1dfc41..969747f21 100644 --- a/platform/mv3/make-rulesets.js +++ b/platform/mv3/make-rulesets.js @@ -111,6 +111,8 @@ const logProgress = text => { process?.stdout?.write?.(text.length > 120 ? `${text.slice(0, 119)}… ` : `${text} `); }; +const isHnRegexOrPath = hn => hn.includes('/'); + /******************************************************************************/ async function fetchText(url, cacheDir) { @@ -828,7 +830,24 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { // Collate all distinct selectors const allSelectors = new Map(); const allHostnames = new Map(); + const allRegexesOrPaths = new Map(); let hasEntities = false; + + const storeHostnameSelectorPair = (hn, iSelector) => { + if ( isHnRegexOrPath(hn) ) { + if ( allRegexesOrPaths.has(hn) === false ) { + allRegexesOrPaths.set(hn, new Set()); + } + allRegexesOrPaths.get(hn).add(iSelector); + } else { + if ( allHostnames.has(hn) === false ) { + allHostnames.set(hn, new Set()); + } + allHostnames.get(hn).add(iSelector); + hasEntities ||= hn.endsWith('.*'); + } + }; + for ( const [ selector, details ] of mapin ) { if ( details.rejected ) { continue; } if ( allSelectors.has(selector) === false ) { @@ -837,30 +856,30 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { const iSelector = allSelectors.get(selector); if ( details.matches ) { for ( const hn of details.matches ) { - if ( allHostnames.has(hn) === false ) { - allHostnames.set(hn, new Set()); - } - allHostnames.get(hn).add(iSelector); - hasEntities ||= hn.endsWith('.*'); + storeHostnameSelectorPair(hn, iSelector); } } if ( details.excludeMatches ) { for ( const hn of details.excludeMatches ) { - if ( allHostnames.has(hn) === false ) { - allHostnames.set(hn, new Set()); - } - allHostnames.get(hn).add(~iSelector); - hasEntities ||= hn.endsWith('.*'); + storeHostnameSelectorPair(hn, ~iSelector); } } } const allSelectorLists = new Map(); - for ( const [ hn, selectorSet ] of allHostnames ) { + + const ilistFromSelectorSet = selectorSet => { const list = JSON.stringify(Array.from(selectorSet).sort()).slice(1, -1); if ( allSelectorLists.has(list) === false ) { allSelectorLists.set(list, allSelectorLists.size); } - allHostnames.set(hn, allSelectorLists.get(list)); + return allSelectorLists.get(list); + }; + + for ( const [ hn, selectorSet ] of allHostnames ) { + allHostnames.set(hn, ilistFromSelectorSet(selectorSet)); + } + for ( const [ regexOrPath, selectorSet ] of allRegexesOrPaths ) { + allRegexesOrPaths.set(regexOrPath, ilistFromSelectorSet(selectorSet)); } const sortedHostnames = Array.from(allHostnames.keys()).toSorted((a, b) => { @@ -875,6 +894,10 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { selectorListRefs: sortedHostnames.map(a => allHostnames.get(a)), hostnames: sortedHostnames, hasEntities, + fromRegexes: Array.from(allRegexesOrPaths) + .filter(a => a[0].startsWith('/') && a[0].endsWith('/')) + .map(a => [ a[0].slice(1, -1), a[1] ]) + .flat(), }); writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, data); @@ -890,7 +913,7 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { log(`CSS-${realm}: ${allSelectors.size} distinct filters for ${allHostnames.size} distinct hostnames`); - return sortedHostnames.length; + return sortedHostnames.length + allRegexesOrPaths.size; } /******************************************************************************/ diff --git a/platform/mv3/make-scriptlets.js b/platform/mv3/make-scriptlets.js index 215d0454f..a64a853a4 100644 --- a/platform/mv3/make-scriptlets.js +++ b/platform/mv3/make-scriptlets.js @@ -33,6 +33,7 @@ const worldTemplate = { args: new Map(), arglists: new Map(), hostnames: new Map(), + regexesOrPaths: new Map(), matches: new Set(), hasEntities: false, hasAncestors: false, @@ -126,6 +127,15 @@ export function compile(assetDetails, details) { const arglistIndex = worldDetails.arglists.get(arglistKey); if ( details.matches ) { for ( const hn of details.matches ) { + if ( hn.includes('/') ) { + worldDetails.matches.clear(); + worldDetails.matches.add('*'); + if ( worldDetails.regexesOrPaths.has(hn) === false ) { + worldDetails.regexesOrPaths.set(hn, new Set()); + } + worldDetails.regexesOrPaths.get(hn).add(arglistIndex); + continue; + } const isEntity = hn.endsWith('.*') || hn.endsWith('.*>>'); worldDetails.hasEntities ||= isEntity; const isAncestor = hn.endsWith('>>') @@ -147,6 +157,13 @@ export function compile(assetDetails, details) { } if ( details.excludeMatches ) { for ( const hn of details.excludeMatches ) { + if ( hn.includes('/') ) { + if ( worldDetails.regexesOrPaths.has(hn) === false ) { + worldDetails.regexesOrPaths.set(hn, new Set()); + } + worldDetails.regexesOrPaths.get(hn).add(~arglistIndex); + continue; + } if ( worldDetails.hostnames.has(hn) === false ) { worldDetails.hostnames.set(hn, new Set()); } @@ -172,9 +189,17 @@ export async function commit(rulesetId, path, writeFn) { if ( d !== 0 ) { return d; } return a[0] < b[0] ? -1 : 1; }).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)])); - let content = safeReplace(scriptletTemplate, /\$rulesetId\$/, rulesetId, 0); - content = safeReplace(content, 'self.$hasEntities$', 'true'); - content = safeReplace(content, 'self.$hasAncestors$', 'true'); + const scriptletFromRegexes = Array.from(worldDetails.regexesOrPaths) + .filter(a => a[0].startsWith('/') && a[0].endsWith('/')) + .map(a => [ a[0].slice(1, -1), JSON.stringify(Array.from(a[1])).slice(1,-1) ]) + .flat(); + let content = safeReplace(scriptletTemplate, 'self.$hasEntities$', JSON.stringify(worldDetails.hasEntities)); + content = safeReplace(content, 'self.$hasAncestors$', JSON.stringify(worldDetails.hasAncestors)); + content = safeReplace(content, 'self.$hasRegexes$', JSON.stringify(scriptletFromRegexes.length !== 0)); + content = safeReplace(content, + 'self.$scriptletFromRegexes$', + `/* ${worldDetails.regexesOrPaths.size} */ ${JSON.stringify(scriptletFromRegexes)}` + ); content = safeReplace(content, 'self.$scriptletHostnames$', `/* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[0]))}` @@ -199,6 +224,7 @@ export async function commit(rulesetId, path, writeFn) { 'self.$scriptletCode$', Array.from(allFunctions.values()).sort().join('\n\n') ); + content = safeReplace(content, /\$rulesetId\$/, rulesetId, 0); writeFn(`${path}/${world.toLowerCase()}/${rulesetId}.js`, content); stats[world] = Array.from(worldDetails.matches).sort(); } diff --git a/platform/mv3/scriptlets/scriptlet.template.js b/platform/mv3/scriptlets/scriptlet.template.js index b8dedca6b..fa6f65ddb 100644 --- a/platform/mv3/scriptlets/scriptlet.template.js +++ b/platform/mv3/scriptlets/scriptlet.template.js @@ -46,8 +46,11 @@ const $scriptletArglistRefs$ = self.$scriptletArglistRefs$; const $scriptletHostnames$ = self.$scriptletHostnames$; +const $scriptletFromRegexes$ = self.$scriptletFromRegexes$; + const $hasEntities$ = self.$hasEntities$; const $hasAncestors$ = self.$hasAncestors$; +const $hasRegexes$ = self.$hasRegexes$; /******************************************************************************/ @@ -134,11 +137,9 @@ if ( $hasAncestors$ ) { } $scriptletHostnames$.length = 0; -if ( todoIndices.size === 0 ) { return; } - // Collect arglist references const todo = new Set(); -{ +if ( todoIndices.size !== 0 ) { const arglistRefs = $scriptletArglistRefs$.split(';'); for ( const i of todoIndices ) { for ( const ref of JSON.parse(`[${arglistRefs[i]}]`) ) { @@ -146,6 +147,19 @@ const todo = new Set(); } } } +if ( $hasRegexes$ ) { + const { hns } = entries[0]; + for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 2 ) { + const regex = new RegExp($scriptletFromRegexes$[i+0]); + for ( const hn of hns ) { + if ( regex.test(hn) === false ) { continue; } + for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+1]}]`) ) { + todo.add(ref); + } + } + } +} +if ( todo.size === 0 ) { return; } // Execute scriplets { diff --git a/src/js/static-dnr-filtering.js b/src/js/static-dnr-filtering.js index 5268a4cbf..4356dd61c 100644 --- a/src/js/static-dnr-filtering.js +++ b/src/js/static-dnr-filtering.js @@ -112,7 +112,6 @@ function addExtendedToDNR(context, parser) { for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { if ( bad ) { continue; } if ( exception ) { continue; } - if ( isRegexOrPath(hn) ) { continue; } let details = context.scriptletFilters.get(argsToken); if ( details === undefined ) { context.scriptletFilters.set(argsToken, details = { args }); @@ -226,8 +225,6 @@ function addExtendedToDNR(context, parser) { for ( const { hn, not, bad } of parser.getExtFilterDomainIterator() ) { if ( bad ) { continue; } if ( not && exception ) { continue; } - // TODO: Support regex- and path-based entries - if ( isRegexOrPath(hn) ) { continue; } if ( not || exception ) { excludeMatches.push(hn); } else if ( hn !== '*' ) {