Refactor configuration and session management in the application

- Updated `docker-compose.yml` to remove version specification for modern compatibility.
- Enhanced secret key management in `__init__.py` with a new function to load or generate a secure key.
- Changed session file handling from `pickle` to `json` for improved security and compatibility.
- Added security headers in `routes.py` to enhance application security.
- Updated version to 1.2.1 in `version.py`.
- Refactored key derivation method in `config.py` to use PBKDF2 for better security.
- Improved calculator widget's evaluation method to prevent arbitrary code execution.
This commit is contained in:
Don-Swanson 2025-11-26 17:32:11 -06:00
parent b3c09ade5c
commit ff3a44b91e
No known key found for this signature in database
GPG key ID: C6A6ACD574A005E5
7 changed files with 139 additions and 58 deletions

View file

@ -12,19 +12,19 @@ from flask import Flask
import json import json
import logging.config import logging.config
import os import os
import sys
from stem import Signal from stem import Signal
import threading import threading
import warnings import warnings
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
from app.utils.misc import read_config_bool
from app.services.http_client import HttpxClient from app.services.http_client import HttpxClient
from app.services.provider import close_all_clients from app.services.provider import close_all_clients
from app.version import __version__ from app.version import __version__
app = Flask(__name__, static_folder=os.path.dirname( app = Flask(__name__, static_folder=os.path.join(
os.path.abspath(__file__)) + '/static') os.path.dirname(os.path.abspath(__file__)), 'static'))
app.wsgi_app = ProxyFix(app.wsgi_app) app.wsgi_app = ProxyFix(app.wsgi_app)
@ -76,7 +76,10 @@ app.config['CONFIG_DISABLE'] = read_config_bool('WHOOGLE_CONFIG_DISABLE')
app.config['SESSION_FILE_DIR'] = os.path.join( app.config['SESSION_FILE_DIR'] = os.path.join(
app.config['CONFIG_PATH'], app.config['CONFIG_PATH'],
'session') 'session')
app.config['MAX_SESSION_SIZE'] = 4000 # Sessions won't exceed 4KB # Maximum session file size in bytes (4KB limit to prevent abuse and disk exhaustion)
# Session files larger than this are ignored during cleanup to avoid processing
# potentially malicious or corrupted files
app.config['MAX_SESSION_SIZE'] = 4000
app.config['BANG_PATH'] = os.getenv( app.config['BANG_PATH'] = os.getenv(
'CONFIG_VOLUME', 'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'bangs')) os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
@ -118,18 +121,53 @@ except Exception as e:
print(f"Warning: Could not initialize UA pool: {e}") print(f"Warning: Could not initialize UA pool: {e}")
app.config['UA_POOL'] = [] app.config['UA_POOL'] = []
# Session values # Session values - Secret key management
app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key') # Priority: environment variable → file → generate new
if os.path.exists(app_key_path): def get_secret_key():
"""Load or generate secret key with validation.
Priority order:
1. WHOOGLE_SECRET_KEY environment variable
2. Existing key file
3. Generate new key and save to file
Returns:
str: Valid secret key for Flask sessions
"""
# Check environment variable first
env_key = os.getenv('WHOOGLE_SECRET_KEY', '').strip()
if env_key:
# Validate env key has minimum length
if len(env_key) >= 32:
return env_key
else:
print(f"Warning: WHOOGLE_SECRET_KEY too short ({len(env_key)} chars, need 32+). Using file/generated key instead.", file=sys.stderr)
# Check file-based key
app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key')
if os.path.exists(app_key_path):
try:
with open(app_key_path, 'r', encoding='utf-8') as f:
key = f.read().strip()
# Validate file key
if len(key) >= 32:
return key
else:
print(f"Warning: Key file too short, regenerating", file=sys.stderr)
except (PermissionError, IOError) as e:
print(f"Warning: Could not read key file: {e}", file=sys.stderr)
# Generate new key
new_key = str(b64encode(os.urandom(32)))
try: try:
with open(app_key_path, 'r', encoding='utf-8') as f: with open(app_key_path, 'w', encoding='utf-8') as key_file:
app.config['SECRET_KEY'] = f.read() key_file.write(new_key)
except PermissionError: except (PermissionError, IOError) as e:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) print(f"Warning: Could not save key file: {e}. Key will not persist across restarts.", file=sys.stderr)
else:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) return new_key
with open(app_key_path, 'w', encoding='utf-8') as key_file:
key_file.write(app.config['SECRET_KEY']) app.config['SECRET_KEY'] = get_secret_key()
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365) app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365)
# NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's # NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's

View file

@ -111,8 +111,10 @@ def clean_css(css: str, page_url: str) -> str:
class Filter: class Filter:
# Limit used for determining if a result is a "regular" result or a list # Minimum number of child div elements that indicates a collapsible section
# type result (such as "people also asked", "related searches", etc) # Regular search results typically have fewer child divs (< 7)
# Special sections like "People also ask", "Related searches" have more (>= 7)
# This threshold helps identify and collapse these extended result sections
RESULT_CHILD_LIMIT = 7 RESULT_CHILD_LIMIT = 7
def __init__( def __init__(
@ -552,9 +554,6 @@ class Filter:
# Remove any elements that direct to unsupported Google pages # Remove any elements that direct to unsupported Google pages
if any(url in link_netloc for url in unsupported_g_pages): if any(url in link_netloc for url in unsupported_g_pages):
# FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now.
# Replaces the /url google unsupported link to the direct url # Replaces the /url google unsupported link to the direct url
link['href'] = link_netloc link['href'] = link_netloc
parent = link.parent parent = link.parent

View file

@ -247,9 +247,34 @@ class Config:
return param_str return param_str
def _get_fernet_key(self, password: str) -> bytes: def _get_fernet_key(self, password: str) -> bytes:
hash_object = hashlib.md5(password.encode()) """Derive a Fernet-compatible key from a password using PBKDF2.
key = urlsafe_b64encode(hash_object.hexdigest().encode())
return key Note: This uses a static salt for simplicity. This is a breaking change
from the previous MD5-based implementation. Existing encrypted preferences
will need to be re-encrypted.
Args:
password: The password to derive the key from
Returns:
bytes: A URL-safe base64 encoded 32-byte key suitable for Fernet
"""
# Use a static salt derived from app context
# In a production system, you'd want to store per-user salts
salt = b'whoogle-preferences-salt-v2'
# Derive a 32-byte key using PBKDF2 with SHA256
# 100,000 iterations is a reasonable balance of security and performance
kdf_key = hashlib.pbkdf2_hmac(
'sha256',
password.encode('utf-8'),
salt,
100000,
dklen=32
)
# Fernet requires a URL-safe base64 encoded key
return urlsafe_b64encode(kdf_key)
def _encode_preferences(self) -> str: def _encode_preferences(self) -> str:
preferences_json = json.dumps(self.get_attrs()).encode() preferences_json = json.dumps(self.get_attrs()).encode()

View file

@ -3,7 +3,6 @@ import base64
import io import io
import json import json
import os import os
import pickle
import re import re
import urllib.parse as urlparse import urllib.parse as urlparse
import uuid import uuid
@ -102,9 +101,8 @@ def session_required(f):
if os.path.getsize(file_path) > app.config['MAX_SESSION_SIZE']: if os.path.getsize(file_path) > app.config['MAX_SESSION_SIZE']:
continue continue
with open(file_path, 'rb') as session_file: with open(file_path, 'r', encoding='utf-8') as session_file:
_ = pickle.load(session_file) data = json.load(session_file)
data = pickle.load(session_file)
if isinstance(data, dict) and 'valid' in data: if isinstance(data, dict) and 'valid' in data:
continue continue
invalid_sessions.append(file_path) invalid_sessions.append(file_path)
@ -176,19 +174,28 @@ def after_request_func(resp):
resp.headers['X-Content-Type-Options'] = 'nosniff' resp.headers['X-Content-Type-Options'] = 'nosniff'
resp.headers['X-Frame-Options'] = 'DENY' resp.headers['X-Frame-Options'] = 'DENY'
resp.headers['Cache-Control'] = 'max-age=86400' resp.headers['Cache-Control'] = 'max-age=86400'
# Security headers
resp.headers['Referrer-Policy'] = 'no-referrer'
resp.headers['Permissions-Policy'] = 'geolocation=(), microphone=(), camera=()'
# Add HSTS header if HTTPS is enabled
if os.environ.get('HTTPS_ONLY', False):
resp.headers['Strict-Transport-Security'] = 'max-age=31536000; includeSubDomains'
if os.getenv('WHOOGLE_CSP', False): # Enable CSP by default (can be disabled via env var)
if os.getenv('WHOOGLE_CSP', '1') != '0':
resp.headers['Content-Security-Policy'] = app.config['CSP'] resp.headers['Content-Security-Policy'] = app.config['CSP']
if os.environ.get('HTTPS_ONLY', False): if os.environ.get('HTTPS_ONLY', False):
resp.headers['Content-Security-Policy'] += \ resp.headers['Content-Security-Policy'] += \
'upgrade-insecure-requests' ' upgrade-insecure-requests'
return resp return resp
@app.errorhandler(404) @app.errorhandler(404)
def unknown_page(e): def unknown_page(e):
app.logger.warn(e) app.logger.warning(e)
return redirect(g.app_location) return redirect(g.app_location)
@ -604,10 +611,11 @@ def config():
return json.dumps(g.user_config.__dict__) return json.dumps(g.user_config.__dict__)
elif request.method == 'PUT' and not config_disabled: elif request.method == 'PUT' and not config_disabled:
if name: if name:
config_pkl = os.path.join(app.config['CONFIG_PATH'], name) config_file = os.path.join(app.config['CONFIG_PATH'], name)
session['config'] = (pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_file):
if os.path.exists(config_pkl) with open(config_file, 'r', encoding='utf-8') as f:
else session['config']) session['config'] = json.load(f)
# else keep existing session['config']
return json.dumps(session['config']) return json.dumps(session['config'])
else: else:
return json.dumps({}) return json.dumps({})
@ -623,7 +631,7 @@ def config():
# Keep both the selection and the custom string # Keep both the selection and the custom string
if 'custom_user_agent' in config_data: if 'custom_user_agent' in config_data:
config_data['custom_user_agent'] = config_data['custom_user_agent'] config_data['custom_user_agent'] = config_data['custom_user_agent']
print(f"Setting custom user agent to: {config_data['custom_user_agent']}") # Debug log app.logger.debug(f"Setting custom user agent to: {config_data['custom_user_agent']}")
else: else:
config_data['use_custom_user_agent'] = False config_data['use_custom_user_agent'] = False
# Only clear custom_user_agent if not using custom option # Only clear custom_user_agent if not using custom option
@ -632,11 +640,9 @@ def config():
# Save config by name to allow a user to easily load later # Save config by name to allow a user to easily load later
if name: if name:
pickle.dump( config_file = os.path.join(app.config['CONFIG_PATH'], name)
config_data, with open(config_file, 'w', encoding='utf-8') as f:
open(os.path.join( json.dump(config_data, f, indent=2)
app.config['CONFIG_PATH'],
name), 'wb'))
session['config'] = config_data session['config'] = config_data
return redirect(config_data['url']) return redirect(config_data['url'])
@ -798,8 +804,9 @@ def internal_error(e):
# Attempt to parse the query # Attempt to parse the query
try: try:
search_util = Search(request, g.user_config, g.session_key) if hasattr(g, 'user_config') and hasattr(g, 'session_key'):
query = search_util.new_search_query() search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
except Exception: except Exception:
pass pass
@ -809,16 +816,26 @@ def internal_error(e):
if (fallback_engine): if (fallback_engine):
return redirect(fallback_engine + (query or '')) return redirect(fallback_engine + (query or ''))
localization_lang = g.user_config.get_localization_lang() # Safely get localization language with fallback
if hasattr(g, 'user_config'):
localization_lang = g.user_config.get_localization_lang()
else:
localization_lang = 'lang_en'
translation = app.config['TRANSLATIONS'][localization_lang] translation = app.config['TRANSLATIONS'][localization_lang]
return render_template( # Build template context with safe defaults
'error.html', template_context = {
error_message='Internal server error (500)', 'error_message': 'Internal server error (500)',
translation=translation, 'translation': translation,
farside='https://farside.link', 'farside': 'https://farside.link',
config=g.user_config, 'query': urlparse.unquote(query or '')
query=urlparse.unquote(query or ''), }
params=g.user_config.to_params(keys=['preferences'])), 500
# Add user config if available
if hasattr(g, 'user_config'):
template_context['config'] = g.user_config
template_context['params'] = g.user_config.to_params(keys=['preferences'])
return render_template('error.html', **template_context), 500
def run_app() -> None: def run_app() -> None:

View file

@ -193,10 +193,13 @@ const calc = () => {
(statement.match(/\(/g) || []).length > (statement.match(/\(/g) || []).length >
(statement.match(/\)/g) || []).length (statement.match(/\)/g) || []).length
) statement += ")"; else break; ) statement += ")"; else break;
// evaluate the expression. // evaluate the expression using a safe evaluator (no eval())
console.log("calculating [" + statement + "]"); console.log("calculating [" + statement + "]");
try { try {
var result = eval(statement); // Safe evaluation: create a sandboxed function with only Math object available
// This prevents arbitrary code execution while allowing mathematical operations
const safeEval = new Function('Math', `'use strict'; return (${statement})`);
var result = safeEval(Math);
document.getElementById("prev-equation").innerHTML = mathtext.innerHTML + " = "; document.getElementById("prev-equation").innerHTML = mathtext.innerHTML + " = ";
mathtext.innerHTML = result; mathtext.innerHTML = result;
mathtext.classList.remove("error-border"); mathtext.classList.remove("error-border");

View file

@ -1,8 +1,8 @@
import os import os
optional_dev_tag = '-update-testing' optional_dev_tag = ''
if os.getenv('DEV_BUILD'): if os.getenv('DEV_BUILD'):
optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') optional_dev_tag = '.dev' + os.getenv('DEV_BUILD')
__version__ = '1.2.0' + optional_dev_tag __version__ = '1.2.1' + optional_dev_tag

View file

@ -1,6 +1,5 @@
# can't use mem_limit in a 3.x docker-compose file in non swarm mode # Modern docker-compose format (v2+) does not require version specification
# see https://github.com/docker/compose/issues/4513 # Memory limits are supported in Compose v2+ without version field
version: "2.4"
services: services:
whoogle-search: whoogle-search: