[mv3] Re-work specific cosmetic filtering-related content scripts

To minimize delay to First Contentful Paint. The idea is to avoid
complex data structures such as Map in order to speed up first content
script execution. Use stringified arrays where practical and
instanciate those arrays from their stringified representation only
when there are actual cosmetic filters to apply.

Related commit:
https://github.com/gorhill/uBlock/commit/6039ef2b6d
This commit is contained in:
Raymond Hill 2025-12-05 08:54:34 -05:00
parent 6039ef2b6d
commit 0aa0d81caf
No known key found for this signature in database
GPG key ID: F5630CAE62A14316
9 changed files with 169 additions and 312 deletions

View file

@ -30,42 +30,47 @@ self.proceduralImports = undefined;
/******************************************************************************/
const selectors = [];
const exceptions = [];
const isolatedAPI = self.isolatedAPI;
const selectors = new Set();
const exceptions = new Set();
const lookupHostname = (hostname, details, out) => {
let seqi = details.hostnamesMap.get(hostname);
if ( seqi === undefined ) { return; }
const { argsList, argsSeqs } = details;
for (;;) {
const argi = argsSeqs[seqi++];
const done = argi > 0;
out.push(...JSON.parse(argsList[done ? argi : -argi]));
if ( done ) { break; }
const lookupHostname = (hostname, details) => {
const listref = isolatedAPI.binarySearch(details.hostnames, hostname);
if ( listref === -1 ) { return; }
if ( Array.isArray(details.selectorLists) === false ) {
details.selectorLists = details.selectorLists.split(';');
details.selectorListRefs = JSON.parse(`[${details.selectorListRefs}]`);
}
const ilist = details.selectorListRefs[listref];
const list = JSON.parse(`[${details.selectorLists[ilist]}]`);
for ( const iselector of list ) {
if ( iselector >= 0 ) {
selectors.add(details.selectors[iselector]);
} else {
exceptions.add(details.selectors[~iselector]);
}
}
};
const lookupAll = hostname => {
for ( const details of proceduralImports ) {
lookupHostname(hostname, details, selectors);
const matches = [];
lookupHostname(`~${hostname}`, details, matches);
if ( matches.length === 0 ) { continue; }
exceptions.push(...matches.map(a => JSON.stringify(a)));
lookupHostname(hostname, details);
}
};
self.isolatedAPI.forEachHostname(lookupAll, {
isolatedAPI.forEachHostname(lookupAll, {
hasEntities: proceduralImports.some(a => a.hasEntities)
});
proceduralImports.length = 0;
if ( selectors.length === 0 ) { return; }
for ( const selector of exceptions ) {
selectors.delete(selector);
}
const exceptedSelectors = exceptions.length !== 0
? selectors.filter(a => exceptions.includes(JSON.stringify(a)) === false)
: selectors;
if ( exceptedSelectors.length === 0 ) { return; }
if ( selectors.size === 0 ) { return; }
const exceptedSelectors = Array.from(selectors).map(a => JSON.parse(a));
const declaratives = exceptedSelectors.filter(a => a.cssable);
if ( declaratives.length !== 0 ) {

View file

@ -30,42 +30,48 @@ self.specificImports = undefined;
/******************************************************************************/
const selectors = [];
const exceptions = [];
const isolatedAPI = self.isolatedAPI;
const selectors = new Set();
const exceptions = new Set();
const lookupHostname = (hostname, details, out) => {
let seqi = details.hostnamesMap.get(hostname);
if ( seqi === undefined ) { return; }
const { argsList, argsSeqs } = details;
for (;;) {
const argi = argsSeqs[seqi++];
const done = argi > 0;
out.push(...argsList[done ? argi : -argi].split('\n'));
if ( done ) { break; }
const lookupHostname = (hostname, details) => {
const listref = isolatedAPI.binarySearch(details.hostnames, hostname);
if ( listref === -1 ) { return; }
if ( Array.isArray(details.selectorLists) === false ) {
details.selectorLists = details.selectorLists.split(';');
details.selectorListRefs = JSON.parse(`[${details.selectorListRefs}]`);
}
const ilist = details.selectorListRefs[listref];
const list = JSON.parse(`[${details.selectorLists[ilist]}]`);
for ( const iselector of list ) {
if ( iselector >= 0 ) {
selectors.add(details.selectors[iselector]);
} else {
exceptions.add(details.selectors[~iselector]);
}
}
};
const lookupAll = hostname => {
for ( const details of specificImports ) {
lookupHostname(hostname, details, selectors);
lookupHostname(`~${hostname}`, details, exceptions);
lookupHostname(hostname, details);
}
};
self.isolatedAPI.forEachHostname(lookupAll, {
isolatedAPI.forEachHostname(lookupAll, {
hasEntities: specificImports.some(a => a.hasEntities)
});
specificImports.length = 0;
if ( selectors.length === 0 ) { return; }
for ( const selector of exceptions ) {
selectors.delete(selector);
}
const exceptedSelectors = exceptions.length !== 0
? selectors.filter(a => exceptions.includes(a) === false)
: selectors;
if ( exceptedSelectors.length === 0 ) { return; }
if ( selectors.size === 0 ) { return; }
self.cssAPI.insert(`${exceptedSelectors.join(',')}{display:none!important;}`);
const css = `${Array.from(selectors).join(',\n')}{display:none!important;}`;
self.cssAPI.insert(css);
/******************************************************************************/

View file

@ -73,6 +73,28 @@
if ( r !== undefined ) { return r; }
}
};
isolatedAPI.binarySearch = (sorted, target) => {
let l = 0, i = 0, d = 0;
let r = sorted.length;
let candidate;
while ( l < r ) {
i = l + r >>> 1;
candidate = sorted[i];
d = target.length - candidate.length;
if ( d === 0 ) {
if ( target === candidate ) { return i; }
d = target < candidate ? -1 : 1;
}
if ( d < 0 ) {
r = i;
} else {
l = i + 1;
}
}
return -1;
};
})(self.isolatedAPI);
/******************************************************************************/

View file

@ -90,11 +90,6 @@ const jsonSetMapReplacer = (k, v) => {
return v;
};
const uidint32 = (s) => {
const h = createHash('sha256').update(s).digest('hex').slice(0,8);
return parseInt(h,16) & 0x7FFFFFFF;
};
/******************************************************************************/
const consoleLog = console.log;
@ -812,84 +807,6 @@ const globalHighlyGenericExceptionSet = new Set();
/******************************************************************************/
// This merges selectors which are used by the same hostnames
function groupSelectorsByHostnames(mapin) {
if ( mapin === undefined ) { return []; }
const merged = new Map();
for ( const [ selector, details ] of mapin ) {
if ( details.rejected ) { continue; }
const json = JSON.stringify(details);
let entries = merged.get(json);
if ( entries === undefined ) {
entries = new Set();
merged.set(json, entries);
}
entries.add(selector);
}
const out = [];
for ( const [ json, entries ] of merged ) {
const details = JSON.parse(json);
details.selectors = Array.from(entries).sort();
out.push(details);
}
return out;
}
// This merges hostnames which have the same set of selectors.
//
// Also, we sort the hostnames to increase likelihood that selector with
// same hostnames will end up in same generated scriptlet.
function groupHostnamesBySelectors(arrayin) {
const contentMap = new Map();
for ( const entry of arrayin ) {
const id = uidint32(JSON.stringify(entry.selectors));
let details = contentMap.get(id);
if ( details === undefined ) {
details = { a: entry.selectors };
contentMap.set(id, details);
}
if ( entry.matches !== undefined ) {
if ( details.y === undefined ) {
details.y = new Set();
}
for ( const hn of entry.matches ) {
details.y.add(hn);
}
}
if ( entry.excludeMatches !== undefined ) {
if ( details.n === undefined ) {
details.n = new Set();
}
for ( const hn of entry.excludeMatches ) {
details.n.add(hn);
}
}
}
const out = Array.from(contentMap).map(a => [
a[0], {
a: a[1].a,
y: a[1].y ? Array.from(a[1].y) : undefined,
n: a[1].n ? Array.from(a[1].n) : undefined,
}
]);
return out;
}
const scriptletHostnameToIdMap = (hostnames, id, map) => {
for ( const hn of hostnames ) {
const existing = map.get(hn);
if ( existing === undefined ) {
map.set(hn, id);
} else if ( Array.isArray(existing) ) {
existing.push(id);
} else {
map.set(hn, [ existing, id ]);
}
}
};
const scriptletJsonReplacer = (k, v) => {
if ( k === 'n' ) {
if ( v === undefined || v.size === 0 ) { return; }
@ -904,175 +821,86 @@ const scriptletJsonReplacer = (k, v) => {
/******************************************************************************/
function argsMap2List(argsMap, hostnamesMap) {
const argsList = [ '' ];
const indexMap = new Map();
for ( const [ id, details ] of argsMap ) {
indexMap.set(id, argsList.length);
argsList.push(details);
}
const argsSeqs = [ 0 ];
const argsSeqsIndices = new Map();
for ( const [ hn, ids ] of hostnamesMap ) {
const seqKey = JSON.stringify(ids);
if ( argsSeqsIndices.has(seqKey) ) {
hostnamesMap.set(hn, argsSeqsIndices.get(seqKey));
continue;
}
const seqIndex = argsSeqs.length;
argsSeqsIndices.set(seqKey, seqIndex);
hostnamesMap.set(hn, seqIndex);
if ( typeof ids === 'number' ) {
argsSeqs.push(indexMap.get(ids));
continue;
}
for ( let i = 0; i < ids.length; i++ ) {
argsSeqs.push(-indexMap.get(ids[i]));
}
argsSeqs[argsSeqs.length-1] = -argsSeqs[argsSeqs.length-1];
}
return { argsList, argsSeqs };
}
/******************************************************************************/
async function processCosmeticFilters(assetDetails, mapin) {
async function processCosmeticFilters(assetDetails, realm, mapin) {
if ( mapin === undefined ) { return 0; }
if ( mapin.size === 0 ) { return 0; }
const domainBasedEntries = groupHostnamesBySelectors(
groupSelectorsByHostnames(mapin)
);
// We do not want more than n CSS files per subscription, so we will
// group multiple unrelated selectors in the same file, and distinct
// css declarations will be injected programmatically according to the
// hostname of the current document.
//
// Collate all distinct selectors
const allSelectors = new Map();
const allHostnames = new Map();
let hasEntities = false;
for ( const [ selector, details ] of mapin ) {
if ( details.rejected ) { continue; }
if ( allSelectors.has(selector) === false ) {
allSelectors.set(selector, allSelectors.size);
}
const iSelector = allSelectors.get(selector);
if ( details.matches ) {
for ( const hn of details.matches ) {
if ( allHostnames.has(hn) === false ) {
allHostnames.set(hn, new Set());
}
allHostnames.get(hn).add(iSelector);
hasEntities ||= hn.endsWith('.*');
}
}
if ( details.excludeMatches ) {
for ( const hn of details.excludeMatches ) {
if ( allHostnames.has(hn) === false ) {
allHostnames.set(hn, new Set());
}
allHostnames.get(hn).add(~iSelector);
hasEntities ||= hn.endsWith('.*');
}
}
}
const allSelectorLists = new Map();
for ( const [ hn, selectorSet ] of allHostnames ) {
const list = JSON.stringify(Array.from(selectorSet).sort()).slice(1, -1);
if ( allSelectorLists.has(list) === false ) {
allSelectorLists.set(list, allSelectorLists.size);
}
allHostnames.set(hn, allSelectorLists.get(list));
}
// The cosmetic filters will be injected programmatically as content
// script and the decisions to activate the cosmetic filters will be
// done at injection time according to the document's hostname.
const generatedFiles = [];
const argsMap = domainBasedEntries.map(entry => [
entry[0],
entry[1].a ? entry[1].a.join('\n') : undefined,
]);
const hostnamesMap = new Map();
let hasEntities = false;
for ( const [ id, details ] of domainBasedEntries ) {
if ( details.y ) {
scriptletHostnameToIdMap(details.y, id, hostnamesMap);
hasEntities ||= details.y.some(a => a.endsWith('.*'));
}
if ( details.n ) {
scriptletHostnameToIdMap(details.n.map(a => `~${a}`), id, hostnamesMap);
hasEntities ||= details.n.some(a => a.endsWith('.*'));
}
}
const { argsList, argsSeqs } = argsMap2List(argsMap, hostnamesMap);
const originalScriptletMap = await loadAllSourceScriptlets();
let patchedScriptlet = originalScriptletMap.get('css-specific').replace(
let patchedScriptlet = originalScriptletMap.get(`css-${realm}`).replace(
'$rulesetId$',
assetDetails.id
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$argsList\$/,
`${JSON.stringify(argsList, scriptletJsonReplacer)}`
/\bself\.\$selectors\$/,
`/* ${allSelectors.size} */ ${JSON.stringify(Array.from(allSelectors.keys()))}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$argsSeqs\$/,
`${JSON.stringify(argsSeqs, scriptletJsonReplacer)}`
/\bself\.\$selectorLists\$/,
`/* ${allSelectorLists.size} */ ${JSON.stringify(Array.from(allSelectorLists.keys()).join(';'))}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$hostnamesMap\$/,
`${JSON.stringify(hostnamesMap, scriptletJsonReplacer)}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
'self.$hasEntities$',
JSON.stringify(hasEntities)
);
writeFile(`${scriptletDir}/specific/${assetDetails.id}.js`, patchedScriptlet);
generatedFiles.push(`${assetDetails.id}`);
if ( generatedFiles.length !== 0 ) {
log(`CSS-specific: ${mapin.size} distinct filters`);
log(`\tCombined into ${hostnamesMap.size} distinct hostnames`);
}
return hostnamesMap.size;
}
/******************************************************************************/
async function processProceduralCosmeticFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
if ( mapin.size === 0 ) { return 0; }
const procedurals = new Map();
mapin.forEach((details, jsonSelector) => {
procedurals.set(jsonSelector, details);
const sortedHostnames = Array.from(allHostnames.keys()).toSorted((a, b) => {
const d = a.length - b.length;
if ( d !== 0 ) { return d; }
return a < b ? -1 : 1;
});
if ( procedurals.size === 0 ) { return 0; }
const contentArray = groupHostnamesBySelectors(
groupSelectorsByHostnames(procedurals)
);
const argsMap = contentArray.map(entry => [
entry[0],
entry[1].a,
]);
const hostnamesMap = new Map();
let hasEntities = false;
for ( const [ id, details ] of contentArray ) {
if ( details.y ) {
scriptletHostnameToIdMap(details.y, id, hostnamesMap);
hasEntities ||= details.y.some(a => a.endsWith('.*'));
}
if ( details.n ) {
scriptletHostnameToIdMap(details.n.map(a => `~${a}`), id, hostnamesMap);
hasEntities ||= details.n.some(a => a.endsWith('.*'));
}
}
const { argsList, argsSeqs } = argsMap2List(argsMap, hostnamesMap);
const argsListAfter = [];
for ( const a of argsList ) {
const aAfter = [];
for ( let b of a ) {
aAfter.push(JSON.parse(b));
}
argsListAfter.push(JSON.stringify(aAfter));
}
const originalScriptletMap = await loadAllSourceScriptlets();
let patchedScriptlet = originalScriptletMap.get('css-procedural').replace(
'$rulesetId$',
assetDetails.id
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$selectorListRefs\$/,
`/* ${sortedHostnames.length} */ "${JSON.stringify(sortedHostnames.map(a => allHostnames.get(a))).slice(1, -1)}"`
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$argsList\$/,
`${JSON.stringify(argsListAfter, scriptletJsonReplacer)}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$argsSeqs\$/,
`${JSON.stringify(argsSeqs, scriptletJsonReplacer)}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
/\bself\.\$hostnamesMap\$/,
`${JSON.stringify(hostnamesMap, scriptletJsonReplacer)}`
/\bself\.\$hostnames\$/,
`/* ${sortedHostnames.length} */ ${JSON.stringify(sortedHostnames)}`
);
patchedScriptlet = safeReplace(patchedScriptlet,
'self.$hasEntities$',
JSON.stringify(hasEntities)
);
writeFile(`${scriptletDir}/procedural/${assetDetails.id}.js`, patchedScriptlet);
writeFile(`${scriptletDir}/${realm}/${assetDetails.id}.js`, patchedScriptlet);
if ( contentArray.length !== 0 ) {
log(`Procedural-related distinct filters: ${procedurals.size} distinct combined selectors`);
log(`\tCombined into ${hostnamesMap.size} distinct hostnames`);
}
log(`CSS-${realm}: ${allSelectors.size} distinct filters for ${allHostnames.size} distinct hostnames`);
return hostnamesMap.size;
return sortedHostnames.length;
}
/******************************************************************************/
@ -1213,10 +1041,13 @@ async function rulesetFromURLs(assetDetails) {
);
const specificCosmeticStats = await processCosmeticFilters(
assetDetails,
'specific',
declarativeCosmetic
);
const proceduralStats = await processProceduralCosmeticFilters(
const proceduralStats = await processCosmeticFilters(
assetDetails,
'procedural',
proceduralCosmetic
);
const scriptletStats = await processScriptletFilters(

View file

@ -173,40 +173,30 @@ export async function commit(rulesetId, path, writeFn) {
return a[0] < b[0] ? -1 : 1;
}).map(a => ([ a[0], JSON.stringify(Array.from(a[1]).map(a => JSON.parse(a))).slice(1,-1)]));
let content = safeReplace(scriptletTemplate, /\$rulesetId\$/, rulesetId, 0);
if ( worldDetails.hasEntities ) {
content = safeReplace(content,
'const $hasEntities$ = false;',
'const $hasEntities$ = true;'
);
}
if ( worldDetails.hasAncestors ) {
content = safeReplace(content,
'const $hasAncestors$ = false;',
'const $hasAncestors$ = true;'
);
};
content = safeReplace(content, 'self.$hasEntities$', 'true');
content = safeReplace(content, 'self.$hasAncestors$', 'true');
content = safeReplace(content,
'const $scriptletHostnames$ = [];',
`const $scriptletHostnames$ = /* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[0]))};`
'self.$scriptletHostnames$',
`/* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[0]))}`
);
content = safeReplace(content,
'const $scriptletArglistRefs$ = [];',
`const $scriptletArglistRefs$ = /* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[1]).join(';'))};`
'self.$scriptletArglistRefs$',
`/* ${hostnames.length} */ ${JSON.stringify(hostnames.map(a => a[1]).join(';'))}`
);
content = safeReplace(content,
'const $scriptletArglists$ = [];',
`const $scriptletArglists$ = /* ${arglists.size} */ ${JSON.stringify(Array.from(arglists.keys()).join(';'))};`
'self.$scriptletArglists$',
`/* ${arglists.size} */ ${JSON.stringify(Array.from(arglists.keys()).join(';'))}`
);
content = safeReplace(content,
'const $scriptletArgs$ = [];',
`const $scriptletArgs$ = /* ${args.size} */ ${JSON.stringify(Array.from(args.keys()).join('\n'))};`
'self.$scriptletArgs$',
`/* ${args.size} */ ${JSON.stringify(Array.from(args.keys()))}`
);
content = safeReplace(content,
'const $scriptletFunctions$ = [];',
`const $scriptletFunctions$ = /* ${scriptletFunctions.size} */\n[${Array.from(scriptletFunctions.keys()).join(',')}];`
'self.$scriptletFunctions$',
`/* ${scriptletFunctions.size} */\n[${Array.from(scriptletFunctions.keys()).join(',')}]`
);
content = safeReplace(content,
'function $scriptletCode$(){} // eslint-disable-line',
'self.$scriptletCode$',
Array.from(allFunctions.values()).join('\n\n')
);
writeFn(`${path}/${world.toLowerCase()}/${rulesetId}.js`, content);

View file

@ -27,13 +27,15 @@
/******************************************************************************/
const argsList = self.$argsList$;
const argsSeqs = self.$argsSeqs$;
const hostnamesMap = new Map(self.$hostnamesMap$);
const selectors = self.$selectors$;
const selectorLists = self.$selectorLists$;
const selectorListRefs = self.$selectorListRefs$;
const hostnames = self.$hostnames$;
const hasEntities = self.$hasEntities$;
self.proceduralImports = self.proceduralImports || [];
self.proceduralImports.push({ argsList, argsSeqs, hostnamesMap, hasEntities });
self.proceduralImports.push({ selectors, selectorLists, selectorListRefs, hostnames, hasEntities });
/******************************************************************************/

View file

@ -27,13 +27,14 @@
/******************************************************************************/
const argsList = self.$argsList$;
const argsSeqs = self.$argsSeqs$;
const hostnamesMap = new Map(self.$hostnamesMap$);
const selectors = self.$selectors$;
const selectorLists = self.$selectorLists$;
const selectorListRefs = self.$selectorListRefs$;
const hostnames = self.$hostnames$;
const hasEntities = self.$hasEntities$;
self.specificImports = self.specificImports || [];
self.specificImports.push({ argsList, argsSeqs, hostnamesMap, hasEntities });
self.specificImports.push({ selectors, selectorLists, selectorListRefs, hostnames, hasEntities });
/******************************************************************************/

View file

@ -30,24 +30,24 @@
/******************************************************************************/
function $scriptletCode$(){} // eslint-disable-line
self.$scriptletCode$
/******************************************************************************/
const scriptletGlobals = {}; // eslint-disable-line
const $scriptletFunctions$ = [];
const $scriptletFunctions$ = self.$scriptletFunctions$;
const $scriptletArgs$ = [];
const $scriptletArgs$ = self.$scriptletArgs$;
const $scriptletArglists$ = [];
const $scriptletArglists$ = self.$scriptletArglists$;
const $scriptletArglistRefs$ = [];
const $scriptletArglistRefs$ = self.$scriptletArglistRefs$;
const $scriptletHostnames$ = [];
const $scriptletHostnames$ = self.$scriptletHostnames$;
const $hasEntities$ = false;
const $hasAncestors$ = false;
const $hasEntities$ = self.$hasEntities$;
const $hasAncestors$ = self.$hasAncestors$;
/******************************************************************************/
@ -150,7 +150,7 @@ const todo = new Set();
// Execute scriplets
{
const arglists = $scriptletArglists$.split(';');
const args = $scriptletArgs$.split('\n');
const args = $scriptletArgs$;
for ( const ref of todo ) {
if ( ref < 0 ) { continue; }
if ( todo.has(~ref) ) { continue; }

@ -1 +1 @@
Subproject commit 7eab39d94e48ba6410aad20780d8be1e28b70e27
Subproject commit 9e03b41c86e9afc79a782f1cefcc5770d53a0c67