Improves the validation script.

- Updates Python deps - Only run pip install on first time - Corrections to the schema - Adds missing URL field to Picocrypt to avoid failure - Improves the script, with neater output, proper error codes, colors, etc
2026-03-11 08:55:33 +00:00 · 2026-02-22 15:30:18 +00:00 · 2026-02-22 15:30:18 +00:00 · 2fec6f757d
commit 2fec6f757d
parent 0ad29adfa6
5 changed files with 188 additions and 159 deletions
--- a/2
+++ b/2
@ -34,7 +34,7 @@ WEB_DIR := web

 # Targets for lib/
 install_lib_deps:
-	$(PYTHON) -m pip install -r $(LIB_DIR)/requirements.txt
+	$(PYTHON) -m pip install -q -r $(LIB_DIR)/requirements.txt

 gen_readme: install_lib_deps
 	$(PYTHON) $(LIB_DIR)/awesome-privacy-readme-gen.py
--- a/awesome-privacy.yml
+++ b/awesome-privacy.yml
@ -309,6 +309,7 @@ categories:
      - name: Picocrypt
        github: Picocrypt/Picocrypt
        icon: https://avatars.githubusercontent.com/u/171401041
+        url: ''
        description: |
          Picocrypt is a very small (hence Pico), very simple, yet very secure encryption tools
          that you can use to protect your files. It's designed to be the go-to tool for encryption,
--- a/lib/requirements.txt
+++ b/lib/requirements.txt
@ -1,5 +1,2 @@
 PyYAML==6.0.1
-requests==2.31.0
-jsonschema
-pyyaml
-termcolor
+jsonschema==4.23.0
--- a/lib/schema.json
+++ b/lib/schema.json
@ -22,23 +22,23 @@
                      "name": { "type": "string" },
                      "description": { "type": "string" },
                      "url": { "type": "string" },
-                      "github": { "type": "string", "nullable": true },
-                      "icon": { "type": "string", "nullable": true },
-                      "followWith": { "type": "string", "nullable": true },
-                      "securityAudited": { "type": "boolean", "nullable": true },
-                      "openSource": { "type": "boolean", "nullable": true },
-                      "acceptsCrypto": { "type": "boolean", "nullable": true },
-                      "tosdrId": { "type": "number", "nullable": true },
-                      "iosApp": { "type": "string", "nullable": true },
-                      "androidApp": { "type": "string", "nullable": true },
-                      "discordInvite": { "type": "string", "nullable": true },
-                      "subreddit": { "type": "string", "nullable": true }
+                      "github": { "type": ["string", "null"] },
+                      "icon": { "type": ["string", "null"] },
+                      "followWith": { "type": ["string", "null"] },
+                      "securityAudited": { "type": ["boolean", "null"] },
+                      "openSource": { "type": ["boolean", "null"] },
+                      "acceptsCrypto": { "type": ["boolean", "null"] },
+                      "tosdrId": { "type": ["number", "null"] },
+                      "iosApp": { "type": ["string", "null"] },
+                      "androidApp": { "type": ["string", "null"] },
+                      "discordInvite": { "type": ["string", "null"] },
+                      "subreddit": { "type": ["string", "null"] }
                    },
                    "required": ["name", "description", "url"],
                    "additionalProperties": false
                  }
                },
-                "intro": { "type": "string", "nullable": true },
+                "intro": { "type": ["string", "null"] },
                "notableMentions": {
                  "oneOf": [
                    {
@ -54,16 +54,20 @@
                        "additionalProperties": false
                      }
                    },
-                    { "type": "string" }
-                  ],
-                  "nullable": true
+                    { "type": "string" },
+                    { "type": "null" }
+                  ]
                },
-                "furtherInfo": { "type": "string", "nullable": true },
-                "wordOfWarning": { "type": "string", "nullable": true },
+                "furtherInfo": { "type": ["string", "null"] },
+                "wordOfWarning": { "type": ["string", "null"] },
                "alternativeTo": {
+                  "oneOf": [
+                    {
                      "type": "array",
-                  "items": { "type": "string" },
-                  "nullable": true
+                      "items": { "type": "string" }
+                    },
+                    { "type": "null" }
+                  ]
                }
              },
              "required": ["name", "services"],
--- a/lib/validate-awesome-privacy.py
+++ b/lib/validate-awesome-privacy.py
@ -1,85 +1,112 @@
 import json
 import os
 import sys
-import logging
+
 import yaml
-from termcolor import colored
 from jsonschema import Draft7Validator

-# Configure Logging
-LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
-logging.basicConfig(level=LOG_LEVEL)
-logger = logging.getLogger(__name__)
+# Paths (relative to project root)
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+DATA_PATH = os.path.join(PROJECT_ROOT, "awesome-privacy.yml")
+SCHEMA_PATH = os.path.join(PROJECT_ROOT, "lib/schema.json")
+
+# Exit codes
+EXIT_VALID = 0
+EXIT_VALIDATION_ERRORS = 1
+EXIT_RUNTIME_ERROR = 2
+
+MAX_ERRORS = 20
+
+# ANSI color helpers (disabled when NO_COLOR is set or stderr is not a TTY)
+_use_color = sys.stderr.isatty() and not os.environ.get("NO_COLOR")
+red = (lambda s: f"\033[31m{s}\033[0m") if _use_color else (lambda s: s)
+green = (lambda s: f"\033[32m{s}\033[0m") if _use_color else (lambda s: s)
+yellow = (lambda s: f"\033[33m{s}\033[0m") if _use_color else (lambda s: s)
+dim = (lambda s: f"\033[2m{s}\033[0m") if _use_color else (lambda s: s)


-# Determine the project root based on the script's location
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-awesome_privacy_path = os.path.join(project_root, 'awesome-privacy.yml')
-schema_path = os.path.join(project_root, 'lib/schema.json')
-
-
-# Log method, accepts a message and optional log level
-# and prints the output to the terminal in right color
-def loggy(message: str, level: str = 'debug'):
-    if level == "info":
-        logger.info(colored(message, 'blue'))
-    elif level == "warning":
-        logger.warning(colored(message, 'yellow'))
-    elif level == "error":
-        logger.error(colored(message, 'red'))
-    elif level == "success":
-        logger.info(colored(message, 'green'))
-    elif level == "debug":
-        logger.debug(colored(message, 'grey'))
-
-
-# Loads a given YAML file and returns the data
-def load_yaml(yaml_path: str):
-    loggy(f"Loading YAML from {yaml_path}", "info")
-    try:
-        with open(yaml_path, 'r') as file:
-            return yaml.safe_load(file)
-    except yaml.YAMLError as e:
-        loggy(f"Failed to load YAML: {e}", "error")
-        sys.exit(1)
-
-
-# Loads a given JSON Schema file and returns the data
-def load_schema(schema_path: str):
-    loggy(f"Loading JSON Schema from {schema_path}", "info")
-    try:
-        with open(schema_path, 'r') as file:
-            return json.load(file)
-    except json.JSONDecodeError as e:
-        loggy(f"Failed to load JSON Schema: {e}", "error")
-        sys.exit(1)
-
-
-# Validates the given YAML data against the given JSON Schema
-def validate_yaml(data, schema):
-    loggy("Beginning validation", "info")
-    validator = Draft7Validator(schema)
-    errors = sorted(validator.iter_errors(data), key=lambda e: e.path)
-    if errors:
-        for error in errors:
-            error_location = "->".join(map(str, error.path))
-            loggy(f"Validation error: {error.message} (at {error_location})", "warning")
-        return False
-    return True
-
-
-# Main method
-def main():
-    loggy("Starting...", "info")
-    yaml_data = load_yaml(awesome_privacy_path)
-    schema = load_schema(schema_path)
-
-    if validate_yaml(yaml_data, schema):
-        loggy("Validation successful!", "success")
-        sys.exit(0)
+def resolve_path(data, path_parts):
+    """Walk the data along path_parts, replacing indices with 'name' values."""
+    segments = []
+    current = data
+    for part in path_parts:
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+            if isinstance(current, dict) and "name" in current:
+                segments.append(current["name"])
+            elif not isinstance(part, int):
+                pass  # skip dict keys like 'categories', 'sections', 'services'
+        elif isinstance(current, list) and isinstance(part, int) and part < len(current):
+            current = current[part]
+            if isinstance(current, dict) and "name" in current:
+                segments.append(current["name"])
            else:
-        loggy("Validation failed.", "error")
-        sys.exit(1)
+                segments.append(str(part))
+        else:
+            segments.append(str(part))
+            break
+    return " > ".join(segments) if segments else "(root)"
+
+
+def load_yaml(path):
+    try:
+        with open(path, "r") as f:
+            return yaml.safe_load(f)
+    except FileNotFoundError:
+        print(red(f"File not found: {path}"), file=sys.stderr)
+        sys.exit(EXIT_RUNTIME_ERROR)
+    except yaml.YAMLError as e:
+        print(red(f"Failed to parse YAML: {e}"), file=sys.stderr)
+        sys.exit(EXIT_RUNTIME_ERROR)
+
+
+def load_schema(path):
+    try:
+        with open(path, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(red(f"File not found: {path}"), file=sys.stderr)
+        sys.exit(EXIT_RUNTIME_ERROR)
+    except json.JSONDecodeError as e:
+        print(red(f"Failed to parse JSON schema: {e}"), file=sys.stderr)
+        sys.exit(EXIT_RUNTIME_ERROR)
+
+
+def validate(data, schema):
+    validator = Draft7Validator(schema)
+    errors = sorted(validator.iter_errors(data), key=lambda e: list(e.path))
+    formatted = []
+    for error in errors:
+        location = resolve_path(data, list(error.path))
+        formatted.append(f"{location}: {error.message}")
+    return formatted
+
+
+def main():
+    data = load_yaml(DATA_PATH)
+    schema = load_schema(SCHEMA_PATH)
+    errors = validate(data, schema)
+
+    if errors:
+        shown = errors[:MAX_ERRORS]
+        for msg in shown:
+            print(red("ERROR") + " " + msg, file=sys.stderr)
+        if len(errors) > MAX_ERRORS:
+            print(dim(f"...and {len(errors) - MAX_ERRORS} more"), file=sys.stderr)
+        print(red(f"Validation failed: {len(errors)} error(s)"), file=sys.stderr)
+        sys.exit(EXIT_VALIDATION_ERRORS)
+
+    # Gather stats
+    categories = data.get("categories", [])
+    num_categories = len(categories)
+    num_sections = sum(len(c.get("sections", [])) for c in categories)
+    num_services = sum(
+        len(s.get("services", []))
+        for c in categories
+        for s in c.get("sections", [])
+    )
+    print(green(f"Valid! {num_categories} categories, {num_sections} sections, {num_services} services"))
+    sys.exit(EXIT_VALID)


 if __name__ == "__main__":