diff --git a/platform/mv3/extension/js/scripting/isolated-api.js b/platform/mv3/extension/js/scripting/isolated-api.js index 837b16131..774517268 100644 --- a/platform/mv3/extension/js/scripting/isolated-api.js +++ b/platform/mv3/extension/js/scripting/isolated-api.js @@ -149,12 +149,13 @@ selectorsFromListIndex(data, data.selectorListRefs[listref]); } const { fromRegexes } = data; - for ( let i = 0, n = fromRegexes.length; i < n; i += 2 ) { - if ( typeof fromRegexes[i+0] === 'string' ) { - fromRegexes[i+0] = new RegExp(fromRegexes[i+0]); + for ( let i = 0, n = fromRegexes.length; i < n; i += 3 ) { + if ( hostname.includes(fromRegexes[i+0]) === false ) { continue; } + if ( typeof fromRegexes[i+1] === 'string' ) { + fromRegexes[i+1] = new RegExp(fromRegexes[i+1]); } - if ( fromRegexes[i+0].test(hostname) === false ) { continue; } - selectorsFromListIndex(data, fromRegexes[i+1]); + if ( fromRegexes[i+1].test(hostname) === false ) { continue; } + selectorsFromListIndex(data, fromRegexes[i+2]); } }; diff --git a/platform/mv3/make-rulesets.js b/platform/mv3/make-rulesets.js index 969747f21..d17aa5218 100644 --- a/platform/mv3/make-rulesets.js +++ b/platform/mv3/make-rulesets.js @@ -33,6 +33,7 @@ import { import { execSync } from 'node:child_process'; import fs from 'fs/promises'; +import { literalStrFromRegex } from './js/regex-analyzer.js'; import path from 'path'; import process from 'process'; import redirectResourcesMap from './js/redirect-resources.js'; @@ -888,7 +889,7 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { return a < b ? -1 : 1; }); - const data = JSON.stringify({ + const data = { selectors: Array.from(allSelectors.keys()), selectorLists: Array.from(allSelectorLists.keys()), selectorListRefs: sortedHostnames.map(a => allHostnames.get(a)), @@ -896,10 +897,12 @@ async function processCosmeticFilters(assetDetails, realm, mapin) { hasEntities, fromRegexes: Array.from(allRegexesOrPaths) .filter(a => a[0].startsWith('/') && a[0].endsWith('/')) - .map(a => [ a[0].slice(1, -1), a[1] ]) - .flat(), - }); - writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, data); + .map(a => { + const restr = a[0].slice(1,-1); + return [ literalStrFromRegex(restr).slice(0,8), restr, a[1] ] + }).flat(), + }; + writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, JSON.stringify(data)); // The cosmetic filters will be injected programmatically as content // script and the decisions to activate the cosmetic filters will be diff --git a/platform/mv3/make-scriptlets.js b/platform/mv3/make-scriptlets.js index a64a853a4..8a6b2d347 100644 --- a/platform/mv3/make-scriptlets.js +++ b/platform/mv3/make-scriptlets.js @@ -21,6 +21,7 @@ import { builtinScriptlets } from './js/resources/scriptlets.js'; import fs from 'fs/promises'; +import { literalStrFromRegex } from './js/regex-analyzer.js'; import { safeReplace } from './safe-replace.js'; /******************************************************************************/ @@ -191,8 +192,14 @@ export async function commit(rulesetId, path, writeFn) { }).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)])); const scriptletFromRegexes = Array.from(worldDetails.regexesOrPaths) .filter(a => a[0].startsWith('/') && a[0].endsWith('/')) - .map(a => [ a[0].slice(1, -1), JSON.stringify(Array.from(a[1])).slice(1,-1) ]) - .flat(); + .map(a => { + const restr = a[0].slice(1,-1); + return [ + literalStrFromRegex(restr).slice(0,8), + restr, + JSON.stringify(Array.from(a[1])).slice(1,-1), + ]; + }).flat(); let content = safeReplace(scriptletTemplate, 'self.$hasEntities$', JSON.stringify(worldDetails.hasEntities)); content = safeReplace(content, 'self.$hasAncestors$', JSON.stringify(worldDetails.hasAncestors)); content = safeReplace(content, 'self.$hasRegexes$', JSON.stringify(scriptletFromRegexes.length !== 0)); diff --git a/platform/mv3/scriptlets/scriptlet.template.js b/platform/mv3/scriptlets/scriptlet.template.js index fa6f65ddb..6655d3ca9 100644 --- a/platform/mv3/scriptlets/scriptlet.template.js +++ b/platform/mv3/scriptlets/scriptlet.template.js @@ -149,11 +149,16 @@ if ( todoIndices.size !== 0 ) { } if ( $hasRegexes$ ) { const { hns } = entries[0]; - for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 2 ) { - const regex = new RegExp($scriptletFromRegexes$[i+0]); + for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 3 ) { + const needle = $scriptletFromRegexes$[i+0]; + let regex; for ( const hn of hns ) { + if ( hn.includes(needle) === false ) { continue; } + if ( regex === undefined ) { + regex = new RegExp($scriptletFromRegexes$[i+1]); + } if ( regex.test(hn) === false ) { continue; } - for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+1]}]`) ) { + for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+2]}]`) ) { todo.add(ref); } } diff --git a/src/js/regex-analyzer.js b/src/js/regex-analyzer.js index 0a885047f..a46ac6027 100644 --- a/src/js/regex-analyzer.js +++ b/src/js/regex-analyzer.js @@ -40,6 +40,16 @@ export function tokenizableStrFromRegex(reStr) { return _literalStrFromRegex(reStr); } +export function literalStrFromRegex(reStr) { + let literals = tokenizableStrFromRegex(reStr) + .split(/[\x00\x01]+/) + .sort((a, b) => b.length - a.length); + if ( literals.length > 1 ) { + literals = literals.filter(a => (/^(\.?com|\.?net|www\.?)$/).test(a) === false); + } + return literals[0] || ''; +} + /******************************************************************************/ function _isRE2(node) { diff --git a/tools/make-mv3.sh b/tools/make-mv3.sh index 403ad7e66..77a5a6414 100755 --- a/tools/make-mv3.sh +++ b/tools/make-mv3.sh @@ -129,6 +129,8 @@ cp platform/mv3/*.json "$UBOL_BUILD_DIR"/ cp platform/mv3/*.js "$UBOL_BUILD_DIR"/ cp platform/mv3/*.mjs "$UBOL_BUILD_DIR"/ cp platform/mv3/extension/js/utils.js "$UBOL_BUILD_DIR"/js/ +cp "$UBO_DIR"/src/js/regex-analyzer.js "$UBOL_BUILD_DIR"/js/ +cp -R "$UBO_DIR"/src/lib/regexanalyzer "$UBOL_BUILD_DIR"/ cp -R "$UBO_DIR"/src/js/resources "$UBOL_BUILD_DIR"/js/ cp -R platform/mv3/scriptlets "$UBOL_BUILD_DIR"/ mkdir -p "$UBOL_BUILD_DIR"/web_accessible_resources