refs #1247: Remove DOCTYPE before html parsed

This commit is contained in:
rstefko 2025-09-24 10:57:31 +02:00
parent 8c85ab600c
commit a9faca8ab6

View file

@ -157,8 +157,9 @@ class Search:
user_agent=self.user_agent)
# Produce cleanable html soup from response
get_body_safed = get_body.text.replace("<","andlt;").replace(">","andgt;")
html_soup = bsoup(get_body_safed, 'html.parser').html
get_body_safed = re.sub(r'<!DOCTYPE[^>]*>\s*', '', get_body.text, flags=re.IGNORECASE)
get_body_safed = get_body_safed.replace("&lt;","andlt;").replace("&gt;","andgt;")
html_soup = bsoup(get_body_safed, 'html.parser')
# Replace current soup if view_image is active
# FIXME: Broken since the user agent changes as of 16 Jan 2025