From 408b538e75b7937204fb900ccc260067c2a95bb4 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 22 May 2025 11:46:08 -0400 Subject: [PATCH] [mv3] Add support to convert `header=` option to DNR rules Related issue: https://github.com/uBlockOrigin/uBOL-home/issues/157 The `header=` option will be converted into DNR's `responseHeaders` condition. There will be an attempt to convert regex-based values into DNR- compatible syntax. Not all regex-based patterns can be converted to use DNR's patterns with `*` and `?` special characters. The implementation of `header=` option in uBO has been revisited to improve compatibility with DNR syntax to minimize burden for list maintainers when creating `header=` filters compatible with both uBO and uBOL. The changes: - Header names are now case-insensitive by default - Occurrences of `*` in non-regex-based header values now mean "matches any number of characters" - Occurrences of `?` in non-regex-based header values now mean "matches zero or one character" At time of commit, and as per MDN, only Chromium-based browsers currently support filtering on repsonse headers: https://developer.mozilla.org/docs/Mozilla/Add-ons/WebExtensions/API/declarativeNetRequest/HeaderInfo Also as per MDN, Chromium 121-127 silently ignore the `responseHeaders` condition, potentially causing undue blocking of network requests. Currently uBOL support Chromium 122 and later, meaning we need to mind potential false positives in Chromium 122-127 for filters using `header=` option. --- platform/mv3/make-rulesets.js | 4 +++ src/js/regex-analyzer.js | 54 +++++++++++++++++++++++++++++++ src/js/static-filtering-parser.js | 43 +++++++++++++++++------- src/js/static-net-filtering.js | 41 +++++++++++++++++++---- 4 files changed, 125 insertions(+), 17 deletions(-) diff --git a/platform/mv3/make-rulesets.js b/platform/mv3/make-rulesets.js index 9883db6a2..8a9b0c712 100644 --- a/platform/mv3/make-rulesets.js +++ b/platform/mv3/make-rulesets.js @@ -333,6 +333,10 @@ function patchRuleset(ruleset) { log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true); continue; } + if ( Array.isArray(rule.condition.responseHeaders) ) { + log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true); + continue; + } if ( Array.isArray(condition.requestMethods) ) { log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true); continue; diff --git a/src/js/regex-analyzer.js b/src/js/regex-analyzer.js index d41915ba7..0a885047f 100644 --- a/src/js/regex-analyzer.js +++ b/src/js/regex-analyzer.js @@ -61,6 +61,8 @@ function _isRE2(node) { return true; } +/******************************************************************************/ + function _literalStrFromRegex(reStr) { if ( RegexAnalyzer === null ) { return ''; } let s = ''; @@ -200,3 +202,55 @@ function tokenizableStrFromNode(node) { } return '\x01'; } + +/******************************************************************************/ + +export function toHeaderPattern(reStr) { + if ( RegexAnalyzer === null ) { return; } + try { + return _toHeaderPattern(RegexAnalyzer(reStr, false).tree()); + } catch { + } +} + +function _toHeaderPattern(branch, depth = 0) { + switch ( branch.type ) { + case 1: /* T_SEQUENCE, 'Sequence' */ { + let s = ''; + for ( const node of branch.val ) { + const t = _toHeaderPattern(node, depth+1); + if ( t === undefined ) { return; } + s += t; + } + if ( depth === 0 && branch.val.length !== 0 ) { + const first = branch.val[0]; + if ( first.type !== 128 || first.val !== '^' ) { s = `*${s}`; } + const last = branch.val.at(-1); + if ( last.type !== 128 || last.val !== '$' ) { s = `${s}*`; } + } + return s; + } + case 4: /* T_GROUP, 'Group' */ { + if ( + branch.flags.NegativeLookAhead === 1 || + branch.flags.NegativeLookBehind === 1 + ) { + return; + } + return _toHeaderPattern(branch.val, depth+1); + } + case 64: /* T_HEXCHAR, 'HexChar' */ + return branch.flags.Char; + case 128: /* T_SPECIAL, 'Special' */ { + if ( branch.val === '^' ) { return ''; } + if ( branch.val === '$' ) { return ''; } + return; + } + case 1024: /* T_STRING, 'String' */ + return branch.val; + case 2048: /* T_COMMENT, 'Comment' */ + return ''; + default: + break; + } +} diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index b9b54ccb7..f9dca0000 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -630,6 +630,9 @@ const exCharCodeAt = (s, i) => { return pos >= 0 ? s.charCodeAt(pos) : -1; }; +const escapeForRegex = s => + s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + /******************************************************************************/ class AstWalker { @@ -3024,25 +3027,44 @@ export function parseHeaderValue(arg) { const out = { }; let pos = s.indexOf(':'); if ( pos === -1 ) { pos = s.length; } - out.name = s.slice(0, pos); + out.name = s.slice(0, pos).toLowerCase(); out.bad = out.name === ''; s = s.slice(pos + 1); out.not = s.charCodeAt(0) === 0x7E /* '~' */; if ( out.not ) { s = s.slice(1); } out.value = s; + if ( s === '' ) { return out; } const match = /^\/(.+)\/(i)?$/.exec(s); - if ( match !== null ) { - try { - out.re = new RegExp(match[1], match[2] || ''); - } - catch { - out.bad = true; - } + out.isRegex = match !== null; + if ( out.isRegex ) { + out.reStr = match[1]; + out.reFlags = match[2] || ''; + try { new RegExp(out.reStr, out.reFlags); } + catch { out.bad = true; } + return out; } + out.reFlags = 'i'; + if ( /[*?]/.test(s) === false ) { + out.reStr = escapeForRegex(s); + return out; + } + const reConstruct = /(?+~]\s*)(?:[A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|[.#][A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\](?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*))*$/ this.reEatBackslashes = /\\([()])/g; - this.reEscapeRegex = /[.*+?^${}()|[\]\\]/g; // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-classes this.knownPseudoClasses = new Set([ 'active', 'any-link', 'autofill', @@ -4043,7 +4064,7 @@ class ExtSelectorCompiler { regexDetails = [ regexDetails, match[2] ]; } } else { - regexDetails = '^' + value.replace(this.reEscapeRegex, '\\$&') + '$'; + regexDetails = `^${escapeForRegex(value)}$`; } return { name, pseudo, value: regexDetails }; } diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 68364c80c..65eabf94c 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -23,7 +23,7 @@ import * as sfp from './static-filtering-parser.js'; import { domainFromHostname, hostnameFromNetworkURL } from './uri-utils.js'; import { dropTask, queueTask } from './tasks.js'; -import { isRE2, tokenizableStrFromRegex } from './regex-analyzer.js'; +import { isRE2, toHeaderPattern, tokenizableStrFromRegex } from './regex-analyzer.js'; import BidiTrieContainer from './biditrie.js'; import { CompiledListReader } from './static-filtering-io.js'; @@ -2929,18 +2929,26 @@ class FilterOnHeaders { if ( refs.$parsed === null ) { refs.$parsed = sfp.parseHeaderValue(refs.headerOpt); } - const { bad, name, not, re, value } = refs.$parsed; + const { bad, name, not, value } = refs.$parsed; if ( bad ) { return false; } const headerValue = $httpHeaders.lookup(name); if ( headerValue === undefined ) { return false; } if ( value === '' ) { return true; } - return re === undefined - ? (headerValue === value) !== not - : re.test(headerValue) !== not; + let { re } = refs.$parsed; + if ( re === undefined ) { + re = new RegExp(refs.$parsed.reStr, refs.$parsed.reFlags); + refs.$parsed.re = re; + } + return re.test(headerValue) !== not; } static compile(details) { - return [ FilterOnHeaders.fid, details.optionValues.get('header') ]; + const parsed = sfp.parseHeaderValue(details.optionValues.get('header')); + let normalized = parsed.name; + if ( parsed.value !== '' ) { + normalized += `:${parsed.value}`; + } + return [ FilterOnHeaders.fid, normalized ]; } static fromCompiled(args) { @@ -2954,6 +2962,27 @@ class FilterOnHeaders { } static dnrFromCompiled(args, rule) { + rule.condition ||= {}; + const parsed = sfp.parseHeaderValue(args[1]); + if ( parsed.bad !== true ) { + const value = parsed.isRegex + ? toHeaderPattern(parsed.reStr) + : parsed.value; + if ( value !== undefined ) { + const prop = parsed.not + ? 'excludedResponseHeaders' + : 'responseHeaders'; + rule.condition[prop] ||= []; + const details = { + header: parsed.name, + }; + if ( value !== '' ) { + details.values = [ value ]; + } + rule.condition[prop].push(details); + return; + } + } dnrAddRuleError(rule, `header="${args[1]}" not supported`); }