[mv3] Avoid instantiating regexes by using simple needle first

Related issue:
https://github.com/uBlockOrigin/uBOL-home/issues/223
This commit is contained in:
Raymond Hill 2025-12-27 09:45:25 -05:00
parent bb34a4b83b
commit df0d21d92e
No known key found for this signature in database
GPG key ID: F5630CAE62A14316
6 changed files with 43 additions and 15 deletions

View file

@ -149,12 +149,13 @@
selectorsFromListIndex(data, data.selectorListRefs[listref]);
}
const { fromRegexes } = data;
for ( let i = 0, n = fromRegexes.length; i < n; i += 2 ) {
if ( typeof fromRegexes[i+0] === 'string' ) {
fromRegexes[i+0] = new RegExp(fromRegexes[i+0]);
for ( let i = 0, n = fromRegexes.length; i < n; i += 3 ) {
if ( hostname.includes(fromRegexes[i+0]) === false ) { continue; }
if ( typeof fromRegexes[i+1] === 'string' ) {
fromRegexes[i+1] = new RegExp(fromRegexes[i+1]);
}
if ( fromRegexes[i+0].test(hostname) === false ) { continue; }
selectorsFromListIndex(data, fromRegexes[i+1]);
if ( fromRegexes[i+1].test(hostname) === false ) { continue; }
selectorsFromListIndex(data, fromRegexes[i+2]);
}
};

View file

@ -33,6 +33,7 @@ import {
import { execSync } from 'node:child_process';
import fs from 'fs/promises';
import { literalStrFromRegex } from './js/regex-analyzer.js';
import path from 'path';
import process from 'process';
import redirectResourcesMap from './js/redirect-resources.js';
@ -888,7 +889,7 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
return a < b ? -1 : 1;
});
const data = JSON.stringify({
const data = {
selectors: Array.from(allSelectors.keys()),
selectorLists: Array.from(allSelectorLists.keys()),
selectorListRefs: sortedHostnames.map(a => allHostnames.get(a)),
@ -896,10 +897,12 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
hasEntities,
fromRegexes: Array.from(allRegexesOrPaths)
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
.map(a => [ a[0].slice(1, -1), a[1] ])
.flat(),
});
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, data);
.map(a => {
const restr = a[0].slice(1,-1);
return [ literalStrFromRegex(restr).slice(0,8), restr, a[1] ]
}).flat(),
};
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, JSON.stringify(data));
// The cosmetic filters will be injected programmatically as content
// script and the decisions to activate the cosmetic filters will be

View file

@ -21,6 +21,7 @@
import { builtinScriptlets } from './js/resources/scriptlets.js';
import fs from 'fs/promises';
import { literalStrFromRegex } from './js/regex-analyzer.js';
import { safeReplace } from './safe-replace.js';
/******************************************************************************/
@ -191,8 +192,14 @@ export async function commit(rulesetId, path, writeFn) {
}).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)]));
const scriptletFromRegexes = Array.from(worldDetails.regexesOrPaths)
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
.map(a => [ a[0].slice(1, -1), JSON.stringify(Array.from(a[1])).slice(1,-1) ])
.flat();
.map(a => {
const restr = a[0].slice(1,-1);
return [
literalStrFromRegex(restr).slice(0,8),
restr,
JSON.stringify(Array.from(a[1])).slice(1,-1),
];
}).flat();
let content = safeReplace(scriptletTemplate, 'self.$hasEntities$', JSON.stringify(worldDetails.hasEntities));
content = safeReplace(content, 'self.$hasAncestors$', JSON.stringify(worldDetails.hasAncestors));
content = safeReplace(content, 'self.$hasRegexes$', JSON.stringify(scriptletFromRegexes.length !== 0));

View file

@ -149,11 +149,16 @@ if ( todoIndices.size !== 0 ) {
}
if ( $hasRegexes$ ) {
const { hns } = entries[0];
for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 2 ) {
const regex = new RegExp($scriptletFromRegexes$[i+0]);
for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 3 ) {
const needle = $scriptletFromRegexes$[i+0];
let regex;
for ( const hn of hns ) {
if ( hn.includes(needle) === false ) { continue; }
if ( regex === undefined ) {
regex = new RegExp($scriptletFromRegexes$[i+1]);
}
if ( regex.test(hn) === false ) { continue; }
for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+1]}]`) ) {
for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+2]}]`) ) {
todo.add(ref);
}
}

View file

@ -40,6 +40,16 @@ export function tokenizableStrFromRegex(reStr) {
return _literalStrFromRegex(reStr);
}
export function literalStrFromRegex(reStr) {
let literals = tokenizableStrFromRegex(reStr)
.split(/[\x00\x01]+/)
.sort((a, b) => b.length - a.length);
if ( literals.length > 1 ) {
literals = literals.filter(a => (/^(\.?com|\.?net|www\.?)$/).test(a) === false);
}
return literals[0] || '';
}
/******************************************************************************/
function _isRE2(node) {

View file

@ -129,6 +129,8 @@ cp platform/mv3/*.json "$UBOL_BUILD_DIR"/
cp platform/mv3/*.js "$UBOL_BUILD_DIR"/
cp platform/mv3/*.mjs "$UBOL_BUILD_DIR"/
cp platform/mv3/extension/js/utils.js "$UBOL_BUILD_DIR"/js/
cp "$UBO_DIR"/src/js/regex-analyzer.js "$UBOL_BUILD_DIR"/js/
cp -R "$UBO_DIR"/src/lib/regexanalyzer "$UBOL_BUILD_DIR"/
cp -R "$UBO_DIR"/src/js/resources "$UBOL_BUILD_DIR"/js/
cp -R platform/mv3/scriptlets "$UBOL_BUILD_DIR"/
mkdir -p "$UBOL_BUILD_DIR"/web_accessible_resources