mirror of
https://github.com/gorhill/uBlock.git
synced 2026-03-11 09:04:36 +00:00
[mv3] Avoid instantiating regexes by using simple needle first
Related issue: https://github.com/uBlockOrigin/uBOL-home/issues/223
This commit is contained in:
parent
bb34a4b83b
commit
df0d21d92e
6 changed files with 43 additions and 15 deletions
|
|
@ -149,12 +149,13 @@
|
|||
selectorsFromListIndex(data, data.selectorListRefs[listref]);
|
||||
}
|
||||
const { fromRegexes } = data;
|
||||
for ( let i = 0, n = fromRegexes.length; i < n; i += 2 ) {
|
||||
if ( typeof fromRegexes[i+0] === 'string' ) {
|
||||
fromRegexes[i+0] = new RegExp(fromRegexes[i+0]);
|
||||
for ( let i = 0, n = fromRegexes.length; i < n; i += 3 ) {
|
||||
if ( hostname.includes(fromRegexes[i+0]) === false ) { continue; }
|
||||
if ( typeof fromRegexes[i+1] === 'string' ) {
|
||||
fromRegexes[i+1] = new RegExp(fromRegexes[i+1]);
|
||||
}
|
||||
if ( fromRegexes[i+0].test(hostname) === false ) { continue; }
|
||||
selectorsFromListIndex(data, fromRegexes[i+1]);
|
||||
if ( fromRegexes[i+1].test(hostname) === false ) { continue; }
|
||||
selectorsFromListIndex(data, fromRegexes[i+2]);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import {
|
|||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'fs/promises';
|
||||
import { literalStrFromRegex } from './js/regex-analyzer.js';
|
||||
import path from 'path';
|
||||
import process from 'process';
|
||||
import redirectResourcesMap from './js/redirect-resources.js';
|
||||
|
|
@ -888,7 +889,7 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
return a < b ? -1 : 1;
|
||||
});
|
||||
|
||||
const data = JSON.stringify({
|
||||
const data = {
|
||||
selectors: Array.from(allSelectors.keys()),
|
||||
selectorLists: Array.from(allSelectorLists.keys()),
|
||||
selectorListRefs: sortedHostnames.map(a => allHostnames.get(a)),
|
||||
|
|
@ -896,10 +897,12 @@ async function processCosmeticFilters(assetDetails, realm, mapin) {
|
|||
hasEntities,
|
||||
fromRegexes: Array.from(allRegexesOrPaths)
|
||||
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
|
||||
.map(a => [ a[0].slice(1, -1), a[1] ])
|
||||
.flat(),
|
||||
});
|
||||
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, data);
|
||||
.map(a => {
|
||||
const restr = a[0].slice(1,-1);
|
||||
return [ literalStrFromRegex(restr).slice(0,8), restr, a[1] ]
|
||||
}).flat(),
|
||||
};
|
||||
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.json`, JSON.stringify(data));
|
||||
|
||||
// The cosmetic filters will be injected programmatically as content
|
||||
// script and the decisions to activate the cosmetic filters will be
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
import { builtinScriptlets } from './js/resources/scriptlets.js';
|
||||
import fs from 'fs/promises';
|
||||
import { literalStrFromRegex } from './js/regex-analyzer.js';
|
||||
import { safeReplace } from './safe-replace.js';
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
@ -191,8 +192,14 @@ export async function commit(rulesetId, path, writeFn) {
|
|||
}).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)]));
|
||||
const scriptletFromRegexes = Array.from(worldDetails.regexesOrPaths)
|
||||
.filter(a => a[0].startsWith('/') && a[0].endsWith('/'))
|
||||
.map(a => [ a[0].slice(1, -1), JSON.stringify(Array.from(a[1])).slice(1,-1) ])
|
||||
.flat();
|
||||
.map(a => {
|
||||
const restr = a[0].slice(1,-1);
|
||||
return [
|
||||
literalStrFromRegex(restr).slice(0,8),
|
||||
restr,
|
||||
JSON.stringify(Array.from(a[1])).slice(1,-1),
|
||||
];
|
||||
}).flat();
|
||||
let content = safeReplace(scriptletTemplate, 'self.$hasEntities$', JSON.stringify(worldDetails.hasEntities));
|
||||
content = safeReplace(content, 'self.$hasAncestors$', JSON.stringify(worldDetails.hasAncestors));
|
||||
content = safeReplace(content, 'self.$hasRegexes$', JSON.stringify(scriptletFromRegexes.length !== 0));
|
||||
|
|
|
|||
|
|
@ -149,11 +149,16 @@ if ( todoIndices.size !== 0 ) {
|
|||
}
|
||||
if ( $hasRegexes$ ) {
|
||||
const { hns } = entries[0];
|
||||
for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 2 ) {
|
||||
const regex = new RegExp($scriptletFromRegexes$[i+0]);
|
||||
for ( let i = 0, n = $scriptletFromRegexes$.length; i < n; i += 3 ) {
|
||||
const needle = $scriptletFromRegexes$[i+0];
|
||||
let regex;
|
||||
for ( const hn of hns ) {
|
||||
if ( hn.includes(needle) === false ) { continue; }
|
||||
if ( regex === undefined ) {
|
||||
regex = new RegExp($scriptletFromRegexes$[i+1]);
|
||||
}
|
||||
if ( regex.test(hn) === false ) { continue; }
|
||||
for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+1]}]`) ) {
|
||||
for ( const ref of JSON.parse(`[${$scriptletFromRegexes$[i+2]}]`) ) {
|
||||
todo.add(ref);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,16 @@ export function tokenizableStrFromRegex(reStr) {
|
|||
return _literalStrFromRegex(reStr);
|
||||
}
|
||||
|
||||
export function literalStrFromRegex(reStr) {
|
||||
let literals = tokenizableStrFromRegex(reStr)
|
||||
.split(/[\x00\x01]+/)
|
||||
.sort((a, b) => b.length - a.length);
|
||||
if ( literals.length > 1 ) {
|
||||
literals = literals.filter(a => (/^(\.?com|\.?net|www\.?)$/).test(a) === false);
|
||||
}
|
||||
return literals[0] || '';
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
function _isRE2(node) {
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ cp platform/mv3/*.json "$UBOL_BUILD_DIR"/
|
|||
cp platform/mv3/*.js "$UBOL_BUILD_DIR"/
|
||||
cp platform/mv3/*.mjs "$UBOL_BUILD_DIR"/
|
||||
cp platform/mv3/extension/js/utils.js "$UBOL_BUILD_DIR"/js/
|
||||
cp "$UBO_DIR"/src/js/regex-analyzer.js "$UBOL_BUILD_DIR"/js/
|
||||
cp -R "$UBO_DIR"/src/lib/regexanalyzer "$UBOL_BUILD_DIR"/
|
||||
cp -R "$UBO_DIR"/src/js/resources "$UBOL_BUILD_DIR"/js/
|
||||
cp -R platform/mv3/scriptlets "$UBOL_BUILD_DIR"/
|
||||
mkdir -p "$UBOL_BUILD_DIR"/web_accessible_resources
|
||||
|
|
|
|||
Loading…
Reference in a new issue