[mv3] Add support to convert header= option to DNR rules

Related issue:
https://github.com/uBlockOrigin/uBOL-home/issues/157

The `header=` option will be converted into DNR's `responseHeaders`
condition.

There will be an attempt to convert regex-based values into DNR-
compatible syntax. Not all regex-based patterns can be converted to
use DNR's patterns with `*` and `?` special characters.

The implementation of `header=` option in uBO has been revisited to
improve compatibility with DNR syntax to minimize burden for list
maintainers when creating `header=` filters compatible with both
uBO and uBOL.

The changes:
- Header names are now case-insensitive by default
- Occurrences of `*` in non-regex-based header values now mean
  "matches any number of characters"
- Occurrences of `?` in non-regex-based header values now mean
  "matches zero or one character"

At time of commit, and as per MDN, only Chromium-based browsers
currently support filtering on repsonse headers:
https://developer.mozilla.org/docs/Mozilla/Add-ons/WebExtensions/API/declarativeNetRequest/HeaderInfo

Also as per MDN, Chromium 121-127 silently ignore the `responseHeaders`
condition, potentially causing undue blocking of network requests.
Currently uBOL support Chromium 122 and later, meaning we need to mind
potential false positives in Chromium 122-127 for filters using
`header=` option.
This commit is contained in:
Raymond Hill 2025-05-22 11:46:08 -04:00
parent c44f043ed3
commit 408b538e75
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
4 changed files with 125 additions and 17 deletions

View file

@ -333,6 +333,10 @@ function patchRuleset(ruleset) {
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
continue;
}
if ( Array.isArray(rule.condition.responseHeaders) ) {
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
continue;
}
if ( Array.isArray(condition.requestMethods) ) {
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
continue;

View file

@ -61,6 +61,8 @@ function _isRE2(node) {
return true;
}
/******************************************************************************/
function _literalStrFromRegex(reStr) {
if ( RegexAnalyzer === null ) { return ''; }
let s = '';
@ -200,3 +202,55 @@ function tokenizableStrFromNode(node) {
}
return '\x01';
}
/******************************************************************************/
export function toHeaderPattern(reStr) {
if ( RegexAnalyzer === null ) { return; }
try {
return _toHeaderPattern(RegexAnalyzer(reStr, false).tree());
} catch {
}
}
function _toHeaderPattern(branch, depth = 0) {
switch ( branch.type ) {
case 1: /* T_SEQUENCE, 'Sequence' */ {
let s = '';
for ( const node of branch.val ) {
const t = _toHeaderPattern(node, depth+1);
if ( t === undefined ) { return; }
s += t;
}
if ( depth === 0 && branch.val.length !== 0 ) {
const first = branch.val[0];
if ( first.type !== 128 || first.val !== '^' ) { s = `*${s}`; }
const last = branch.val.at(-1);
if ( last.type !== 128 || last.val !== '$' ) { s = `${s}*`; }
}
return s;
}
case 4: /* T_GROUP, 'Group' */ {
if (
branch.flags.NegativeLookAhead === 1 ||
branch.flags.NegativeLookBehind === 1
) {
return;
}
return _toHeaderPattern(branch.val, depth+1);
}
case 64: /* T_HEXCHAR, 'HexChar' */
return branch.flags.Char;
case 128: /* T_SPECIAL, 'Special' */ {
if ( branch.val === '^' ) { return ''; }
if ( branch.val === '$' ) { return ''; }
return;
}
case 1024: /* T_STRING, 'String' */
return branch.val;
case 2048: /* T_COMMENT, 'Comment' */
return '';
default:
break;
}
}

View file

@ -630,6 +630,9 @@ const exCharCodeAt = (s, i) => {
return pos >= 0 ? s.charCodeAt(pos) : -1;
};
const escapeForRegex = s =>
s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
/******************************************************************************/
class AstWalker {
@ -3024,25 +3027,44 @@ export function parseHeaderValue(arg) {
const out = { };
let pos = s.indexOf(':');
if ( pos === -1 ) { pos = s.length; }
out.name = s.slice(0, pos);
out.name = s.slice(0, pos).toLowerCase();
out.bad = out.name === '';
s = s.slice(pos + 1);
out.not = s.charCodeAt(0) === 0x7E /* '~' */;
if ( out.not ) { s = s.slice(1); }
out.value = s;
if ( s === '' ) { return out; }
const match = /^\/(.+)\/(i)?$/.exec(s);
if ( match !== null ) {
try {
out.re = new RegExp(match[1], match[2] || '');
}
catch {
out.bad = true;
}
out.isRegex = match !== null;
if ( out.isRegex ) {
out.reStr = match[1];
out.reFlags = match[2] || '';
try { new RegExp(out.reStr, out.reFlags); }
catch { out.bad = true; }
return out;
}
out.reFlags = 'i';
if ( /[*?]/.test(s) === false ) {
out.reStr = escapeForRegex(s);
return out;
}
const reConstruct = /(?<!\\)[*?]/g;
const reParts = [];
let beg = 0;
for (;;) {
const match = reConstruct.exec(s);
if ( match === null ) { break; }
reParts.push(
escapeForRegex(s.slice(beg, match.index)),
match[0] === '*' ? '.*' : '.?',
);
beg = reConstruct.lastIndex;
}
reParts.push(escapeForRegex(s.slice(beg)));
out.reStr = reParts.join('');
return out;
}
// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier
export function parseReplaceByRegexValue(s) {
@ -3194,7 +3216,6 @@ class ExtSelectorCompiler {
// /^(?:[A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|[.#][A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\](?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*)(?:(?:\s+|\s*[>+~]\s*)(?:[A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|[.#][A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\](?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*))*$/
this.reEatBackslashes = /\\([()])/g;
this.reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
// https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-classes
this.knownPseudoClasses = new Set([
'active', 'any-link', 'autofill',
@ -4043,7 +4064,7 @@ class ExtSelectorCompiler {
regexDetails = [ regexDetails, match[2] ];
}
} else {
regexDetails = '^' + value.replace(this.reEscapeRegex, '\\$&') + '$';
regexDetails = `^${escapeForRegex(value)}$`;
}
return { name, pseudo, value: regexDetails };
}

View file

@ -23,7 +23,7 @@ import * as sfp from './static-filtering-parser.js';
import { domainFromHostname, hostnameFromNetworkURL } from './uri-utils.js';
import { dropTask, queueTask } from './tasks.js';
import { isRE2, tokenizableStrFromRegex } from './regex-analyzer.js';
import { isRE2, toHeaderPattern, tokenizableStrFromRegex } from './regex-analyzer.js';
import BidiTrieContainer from './biditrie.js';
import { CompiledListReader } from './static-filtering-io.js';
@ -2929,18 +2929,26 @@ class FilterOnHeaders {
if ( refs.$parsed === null ) {
refs.$parsed = sfp.parseHeaderValue(refs.headerOpt);
}
const { bad, name, not, re, value } = refs.$parsed;
const { bad, name, not, value } = refs.$parsed;
if ( bad ) { return false; }
const headerValue = $httpHeaders.lookup(name);
if ( headerValue === undefined ) { return false; }
if ( value === '' ) { return true; }
return re === undefined
? (headerValue === value) !== not
: re.test(headerValue) !== not;
let { re } = refs.$parsed;
if ( re === undefined ) {
re = new RegExp(refs.$parsed.reStr, refs.$parsed.reFlags);
refs.$parsed.re = re;
}
return re.test(headerValue) !== not;
}
static compile(details) {
return [ FilterOnHeaders.fid, details.optionValues.get('header') ];
const parsed = sfp.parseHeaderValue(details.optionValues.get('header'));
let normalized = parsed.name;
if ( parsed.value !== '' ) {
normalized += `:${parsed.value}`;
}
return [ FilterOnHeaders.fid, normalized ];
}
static fromCompiled(args) {
@ -2954,6 +2962,27 @@ class FilterOnHeaders {
}
static dnrFromCompiled(args, rule) {
rule.condition ||= {};
const parsed = sfp.parseHeaderValue(args[1]);
if ( parsed.bad !== true ) {
const value = parsed.isRegex
? toHeaderPattern(parsed.reStr)
: parsed.value;
if ( value !== undefined ) {
const prop = parsed.not
? 'excludedResponseHeaders'
: 'responseHeaders';
rule.condition[prop] ||= [];
const details = {
header: parsed.name,
};
if ( value !== '' ) {
details.values = [ value ];
}
rule.condition[prop].push(details);
return;
}
}
dnrAddRuleError(rule, `header="${args[1]}" not supported`);
}