mirror of
https://github.com/benbusby/whoogle-search.git
synced 2026-03-11 08:54:34 +00:00
fix: don't append wikiless params to wikipedia.org replacements
This commit is contained in:
parent
e4cabe3e5b
commit
8a24e4a03c
3 changed files with 24 additions and 7 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import cssutils
|
||||
from app.utils.misc import SKIP_PREFIX
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import ResultSet, Tag
|
||||
from cryptography.fernet import Fernet
|
||||
|
|
@ -690,7 +691,12 @@ class Filter:
|
|||
link_str.find('medium.com') + len('medium.com'):]
|
||||
new_desc.string = link_str
|
||||
else:
|
||||
new_desc.string = link_str.replace(site, alt)
|
||||
# start replacing after scheme//
|
||||
repl_start = link_str.find('//') + 1
|
||||
# end replacement on the next / that's found
|
||||
repl_end = link_str.find('/', repl_start)
|
||||
replace_site = link_str[repl_start:] if repl_end == -1 else link_str[repl_start:repl_end]
|
||||
new_desc.string = link_str.replace(replace_site, alt)
|
||||
|
||||
link_desc.replace_with(new_desc)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ from bs4 import BeautifulSoup as bsoup
|
|||
from cryptography.fernet import Fernet
|
||||
from flask import Request
|
||||
|
||||
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
||||
|
||||
ddg_favicon_site = 'http://icons.duckduckgo.com/ip2'
|
||||
|
||||
empty_gif = base64.b64decode(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from app.models.config import Config
|
||||
from app.models.endpoint import Endpoint
|
||||
from app.utils.misc import list_to_dict
|
||||
from app.utils.misc import list_to_dict, SKIP_PREFIX
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
import copy
|
||||
from flask import current_app
|
||||
|
|
@ -12,7 +12,6 @@ import re
|
|||
import warnings
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
||||
GOOG_STATIC = 'www.gstatic.com'
|
||||
G_M_LOGO_URL = 'https://www.gstatic.com/m/images/icons/googleg.gif'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
|
|
@ -197,6 +196,11 @@ def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
|
|||
|
||||
for site_key in site_alts.keys():
|
||||
site_alt = f'{parsed_link.scheme}://{site_key}'
|
||||
|
||||
# If an site alt is already present in the link, skip updating it
|
||||
if site_alts[site_key] in link:
|
||||
break
|
||||
|
||||
if not hostname or site_alt not in hostcomp or not site_alts[site_key]:
|
||||
continue
|
||||
|
||||
|
|
@ -204,7 +208,12 @@ def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
|
|||
# a 2-char language code) to be passed as a URL param to Wikiless
|
||||
# in order to preserve the language setting.
|
||||
params = ''
|
||||
if 'wikipedia' in hostname and len(subdomain) == 2:
|
||||
|
||||
# Fix edge case for simple.wikipedia.org where substitution with a language subdomain
|
||||
# breaks the link
|
||||
if 'wikipedia' in hostname and subdomain == 'simple' and 'wikipedia' in site_alts[site_key]:
|
||||
break
|
||||
elif 'wikipedia' in hostname and len(subdomain) == 2:
|
||||
hostname = f'{subdomain}.{hostname}'
|
||||
params = f'?lang={subdomain}'
|
||||
elif 'medium' in hostname and len(subdomain) > 0:
|
||||
|
|
@ -220,13 +229,13 @@ def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
|
|||
link = '//'.join(link.split('//')[1:])
|
||||
|
||||
for prefix in SKIP_PREFIX:
|
||||
# replace the first occurrence of the prefix
|
||||
link = link.replace(prefix, '//', 1)
|
||||
if parsed_alt.scheme:
|
||||
# If a scheme is specified, remove everything before the
|
||||
# first occurence of it
|
||||
link = f'{parsed_alt.scheme}{link.split(parsed_alt.scheme, 1)[-1]}'
|
||||
else:
|
||||
# Otherwise, replace the first occurrence of the prefix
|
||||
link = link.replace(prefix, '//', 1)
|
||||
|
||||
break
|
||||
|
||||
return link
|
||||
|
|
|
|||
Loading…
Reference in a new issue