diff --git a/app/__init__.py b/app/__init__.py index 0cf9b66..5535235 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -12,19 +12,19 @@ from flask import Flask import json import logging.config import os +import sys from stem import Signal import threading import warnings from werkzeug.middleware.proxy_fix import ProxyFix -from app.utils.misc import read_config_bool from app.services.http_client import HttpxClient from app.services.provider import close_all_clients from app.version import __version__ -app = Flask(__name__, static_folder=os.path.dirname( - os.path.abspath(__file__)) + '/static') +app = Flask(__name__, static_folder=os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'static')) app.wsgi_app = ProxyFix(app.wsgi_app) @@ -76,7 +76,10 @@ app.config['CONFIG_DISABLE'] = read_config_bool('WHOOGLE_CONFIG_DISABLE') app.config['SESSION_FILE_DIR'] = os.path.join( app.config['CONFIG_PATH'], 'session') -app.config['MAX_SESSION_SIZE'] = 4000 # Sessions won't exceed 4KB +# Maximum session file size in bytes (4KB limit to prevent abuse and disk exhaustion) +# Session files larger than this are ignored during cleanup to avoid processing +# potentially malicious or corrupted files +app.config['MAX_SESSION_SIZE'] = 4000 app.config['BANG_PATH'] = os.getenv( 'CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'bangs')) @@ -118,18 +121,53 @@ except Exception as e: print(f"Warning: Could not initialize UA pool: {e}") app.config['UA_POOL'] = [] -# Session values -app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key') -if os.path.exists(app_key_path): +# Session values - Secret key management +# Priority: environment variable → file → generate new +def get_secret_key(): + """Load or generate secret key with validation. + + Priority order: + 1. WHOOGLE_SECRET_KEY environment variable + 2. Existing key file + 3. Generate new key and save to file + + Returns: + str: Valid secret key for Flask sessions + """ + # Check environment variable first + env_key = os.getenv('WHOOGLE_SECRET_KEY', '').strip() + if env_key: + # Validate env key has minimum length + if len(env_key) >= 32: + return env_key + else: + print(f"Warning: WHOOGLE_SECRET_KEY too short ({len(env_key)} chars, need 32+). Using file/generated key instead.", file=sys.stderr) + + # Check file-based key + app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key') + if os.path.exists(app_key_path): + try: + with open(app_key_path, 'r', encoding='utf-8') as f: + key = f.read().strip() + # Validate file key + if len(key) >= 32: + return key + else: + print(f"Warning: Key file too short, regenerating", file=sys.stderr) + except (PermissionError, IOError) as e: + print(f"Warning: Could not read key file: {e}", file=sys.stderr) + + # Generate new key + new_key = str(b64encode(os.urandom(32))) try: - with open(app_key_path, 'r', encoding='utf-8') as f: - app.config['SECRET_KEY'] = f.read() - except PermissionError: - app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) -else: - app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) - with open(app_key_path, 'w', encoding='utf-8') as key_file: - key_file.write(app.config['SECRET_KEY']) + with open(app_key_path, 'w', encoding='utf-8') as key_file: + key_file.write(new_key) + except (PermissionError, IOError) as e: + print(f"Warning: Could not save key file: {e}. Key will not persist across restarts.", file=sys.stderr) + + return new_key + +app.config['SECRET_KEY'] = get_secret_key() app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365) # NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's diff --git a/app/filter.py b/app/filter.py index f079a64..d57c44d 100644 --- a/app/filter.py +++ b/app/filter.py @@ -111,8 +111,10 @@ def clean_css(css: str, page_url: str) -> str: class Filter: - # Limit used for determining if a result is a "regular" result or a list - # type result (such as "people also asked", "related searches", etc) + # Minimum number of child div elements that indicates a collapsible section + # Regular search results typically have fewer child divs (< 7) + # Special sections like "People also ask", "Related searches" have more (>= 7) + # This threshold helps identify and collapse these extended result sections RESULT_CHILD_LIMIT = 7 def __init__( @@ -552,9 +554,6 @@ class Filter: # Remove any elements that direct to unsupported Google pages if any(url in link_netloc for url in unsupported_g_pages): - # FIXME: The "Shopping" tab requires further filtering (see #136) - # Temporarily removing all links to that tab for now. - # Replaces the /url google unsupported link to the direct url link['href'] = link_netloc parent = link.parent diff --git a/app/models/config.py b/app/models/config.py index 3446309..fb2f31f 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -247,9 +247,34 @@ class Config: return param_str def _get_fernet_key(self, password: str) -> bytes: - hash_object = hashlib.md5(password.encode()) - key = urlsafe_b64encode(hash_object.hexdigest().encode()) - return key + """Derive a Fernet-compatible key from a password using PBKDF2. + + Note: This uses a static salt for simplicity. This is a breaking change + from the previous MD5-based implementation. Existing encrypted preferences + will need to be re-encrypted. + + Args: + password: The password to derive the key from + + Returns: + bytes: A URL-safe base64 encoded 32-byte key suitable for Fernet + """ + # Use a static salt derived from app context + # In a production system, you'd want to store per-user salts + salt = b'whoogle-preferences-salt-v2' + + # Derive a 32-byte key using PBKDF2 with SHA256 + # 100,000 iterations is a reasonable balance of security and performance + kdf_key = hashlib.pbkdf2_hmac( + 'sha256', + password.encode('utf-8'), + salt, + 100000, + dklen=32 + ) + + # Fernet requires a URL-safe base64 encoded key + return urlsafe_b64encode(kdf_key) def _encode_preferences(self) -> str: preferences_json = json.dumps(self.get_attrs()).encode() diff --git a/app/routes.py b/app/routes.py index b176000..e111102 100644 --- a/app/routes.py +++ b/app/routes.py @@ -3,7 +3,6 @@ import base64 import io import json import os -import pickle import re import urllib.parse as urlparse import uuid @@ -102,9 +101,8 @@ def session_required(f): if os.path.getsize(file_path) > app.config['MAX_SESSION_SIZE']: continue - with open(file_path, 'rb') as session_file: - _ = pickle.load(session_file) - data = pickle.load(session_file) + with open(file_path, 'r', encoding='utf-8') as session_file: + data = json.load(session_file) if isinstance(data, dict) and 'valid' in data: continue invalid_sessions.append(file_path) @@ -176,19 +174,28 @@ def after_request_func(resp): resp.headers['X-Content-Type-Options'] = 'nosniff' resp.headers['X-Frame-Options'] = 'DENY' resp.headers['Cache-Control'] = 'max-age=86400' + + # Security headers + resp.headers['Referrer-Policy'] = 'no-referrer' + resp.headers['Permissions-Policy'] = 'geolocation=(), microphone=(), camera=()' + + # Add HSTS header if HTTPS is enabled + if os.environ.get('HTTPS_ONLY', False): + resp.headers['Strict-Transport-Security'] = 'max-age=31536000; includeSubDomains' - if os.getenv('WHOOGLE_CSP', False): + # Enable CSP by default (can be disabled via env var) + if os.getenv('WHOOGLE_CSP', '1') != '0': resp.headers['Content-Security-Policy'] = app.config['CSP'] if os.environ.get('HTTPS_ONLY', False): resp.headers['Content-Security-Policy'] += \ - 'upgrade-insecure-requests' + ' upgrade-insecure-requests' return resp @app.errorhandler(404) def unknown_page(e): - app.logger.warn(e) + app.logger.warning(e) return redirect(g.app_location) @@ -604,10 +611,11 @@ def config(): return json.dumps(g.user_config.__dict__) elif request.method == 'PUT' and not config_disabled: if name: - config_pkl = os.path.join(app.config['CONFIG_PATH'], name) - session['config'] = (pickle.load(open(config_pkl, 'rb')) - if os.path.exists(config_pkl) - else session['config']) + config_file = os.path.join(app.config['CONFIG_PATH'], name) + if os.path.exists(config_file): + with open(config_file, 'r', encoding='utf-8') as f: + session['config'] = json.load(f) + # else keep existing session['config'] return json.dumps(session['config']) else: return json.dumps({}) @@ -623,7 +631,7 @@ def config(): # Keep both the selection and the custom string if 'custom_user_agent' in config_data: config_data['custom_user_agent'] = config_data['custom_user_agent'] - print(f"Setting custom user agent to: {config_data['custom_user_agent']}") # Debug log + app.logger.debug(f"Setting custom user agent to: {config_data['custom_user_agent']}") else: config_data['use_custom_user_agent'] = False # Only clear custom_user_agent if not using custom option @@ -632,11 +640,9 @@ def config(): # Save config by name to allow a user to easily load later if name: - pickle.dump( - config_data, - open(os.path.join( - app.config['CONFIG_PATH'], - name), 'wb')) + config_file = os.path.join(app.config['CONFIG_PATH'], name) + with open(config_file, 'w', encoding='utf-8') as f: + json.dump(config_data, f, indent=2) session['config'] = config_data return redirect(config_data['url']) @@ -798,8 +804,9 @@ def internal_error(e): # Attempt to parse the query try: - search_util = Search(request, g.user_config, g.session_key) - query = search_util.new_search_query() + if hasattr(g, 'user_config') and hasattr(g, 'session_key'): + search_util = Search(request, g.user_config, g.session_key) + query = search_util.new_search_query() except Exception: pass @@ -809,16 +816,26 @@ def internal_error(e): if (fallback_engine): return redirect(fallback_engine + (query or '')) - localization_lang = g.user_config.get_localization_lang() + # Safely get localization language with fallback + if hasattr(g, 'user_config'): + localization_lang = g.user_config.get_localization_lang() + else: + localization_lang = 'lang_en' translation = app.config['TRANSLATIONS'][localization_lang] - return render_template( - 'error.html', - error_message='Internal server error (500)', - translation=translation, - farside='https://farside.link', - config=g.user_config, - query=urlparse.unquote(query or ''), - params=g.user_config.to_params(keys=['preferences'])), 500 + # Build template context with safe defaults + template_context = { + 'error_message': 'Internal server error (500)', + 'translation': translation, + 'farside': 'https://farside.link', + 'query': urlparse.unquote(query or '') + } + + # Add user config if available + if hasattr(g, 'user_config'): + template_context['config'] = g.user_config + template_context['params'] = g.user_config.to_params(keys=['preferences']) + + return render_template('error.html', **template_context), 500 def run_app() -> None: diff --git a/app/static/widgets/calculator.html b/app/static/widgets/calculator.html index f0fbd36..c041899 100644 --- a/app/static/widgets/calculator.html +++ b/app/static/widgets/calculator.html @@ -193,10 +193,13 @@ const calc = () => { (statement.match(/\(/g) || []).length > (statement.match(/\)/g) || []).length ) statement += ")"; else break; - // evaluate the expression. + // evaluate the expression using a safe evaluator (no eval()) console.log("calculating [" + statement + "]"); try { - var result = eval(statement); + // Safe evaluation: create a sandboxed function with only Math object available + // This prevents arbitrary code execution while allowing mathematical operations + const safeEval = new Function('Math', `'use strict'; return (${statement})`); + var result = safeEval(Math); document.getElementById("prev-equation").innerHTML = mathtext.innerHTML + " = "; mathtext.innerHTML = result; mathtext.classList.remove("error-border"); diff --git a/app/version.py b/app/version.py index 6a53b0c..83e01c2 100644 --- a/app/version.py +++ b/app/version.py @@ -1,8 +1,8 @@ import os -optional_dev_tag = '-update-testing' +optional_dev_tag = '' if os.getenv('DEV_BUILD'): optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') -__version__ = '1.2.0' + optional_dev_tag +__version__ = '1.2.1' + optional_dev_tag diff --git a/docker-compose.yml b/docker-compose.yml index 0a693e6..68db779 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,5 @@ -# can't use mem_limit in a 3.x docker-compose file in non swarm mode -# see https://github.com/docker/compose/issues/4513 -version: "2.4" +# Modern docker-compose format (v2+) does not require version specification +# Memory limits are supported in Compose v2+ without version field services: whoogle-search: