edit/scripts/lint-markdown.js


import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DOCS_DIR = path.resolve(__dirname, '../docs');

// Non-recursive scan of DOCS_DIR
function getDocsFiles(dir) {
    const files = fs.readdirSync(dir);
    const mdFiles = [];
    files.forEach(file => {
        const filePath = path.join(dir, file);
        const stat = fs.statSync(filePath);
        if (!stat.isDirectory() && file.endsWith('.md')) {
            mdFiles.push(filePath);
        }
    });
    return mdFiles;
}

const files = getDocsFiles(DOCS_DIR);
let hasErrors = false;

// Load typos from CSV
const typosMap = new Map();
/* sucks right now
try {
    const typosPath = path.resolve(__dirname, 'typos.csv');
    if (fs.existsSync(typosPath)) {
        const typosContent = fs.readFileSync(typosPath, 'utf-8');
        const typoLines = typosContent.split('\n');
        typoLines.forEach(line => {
            const parts = line.split(',');
            if (parts.length >= 2) {
                const typo = parts[0].trim().toLowerCase();
                const correction = parts[1].trim();
                if (typo && correction) {
                    typosMap.set(typo, correction);
                }
            }
        });
        console.log(`✅ Loaded ${typosMap.size} typos from dictionary.`);
    } else {
        console.warn('⚠️ scripts/typos.csv not found, using fallback list.');
    }
} catch (e) {
    console.warn(`⚠️ Failed to load typos: ${e.message}`);
}
*/

console.log('🔍 Scanning markdown files for formatting issues...\n');

files.forEach(file => {
    const content = fs.readFileSync(file, 'utf-8');
    const lines = content.split('\n');
    const relativePath = path.relative(process.cwd(), file);

    // Files to complete ignore from all checks
    const FILES_TO_IGNORE = [
        'docs/feedback.md',
        'docs/index.md'
    ];

    if (FILES_TO_IGNORE.some(fileToIgnore => relativePath === fileToIgnore)) return;

    // Files to ignore for english-specific checks (Typos, A/An, Repeated Words)
    const FILES_TO_IGNORE_ENGLISH_CHECKS = [
        'docs/non-english.md'
    ];
    const isSeparatedEnglishCheck = FILES_TO_IGNORE_ENGLISH_CHECKS.some(f => relativePath === f);


    let currentHeader = '';

    lines.forEach((line, index) => {
        const lineNum = index + 1;
        if (/^#+\s/.test(line)) {
            currentHeader = line;
        }
        let errors = [];

        // Check 1: Starred links must be bolded
        // Pattern: * ⭐ [Link] -> Bad
        // Pattern: * ⭐ **[Link] -> Good
        // Only applies to list items starting with * or -
        if (/^\s*[*+-]\s+⭐/.test(line)) {
            // It's a starred list item.
            // Check if the text immediately following "⭐ " starts with "**"
            // We look for the star, then optional spaces, then ensure "**" follows.
            if (!/⭐\s*\*\*/.test(line)) {
                errors.push('Starred item not bolded (expected * ⭐ **Link**)');
            }
        }

        // Check 2: Space between ] (
        if (/\]\s+\(http/.test(line)) {
            errors.push('Space between bracket and parenthesis in link');
        }

        // Check 3: Missing closing bracket ]
        // Pattern: [Text(http...
        // We look for [ followed by (http without ] in between.
        if (/\[[^\]]*\(http/.test(line)) {
            errors.push('Possible missing closing bracket "]"');
        }

        // Check 4: Missing closing parenthesis )
        // Pattern: [Text](http...  where it ends without )
        // We look for "](http..." followed by space or end of line, but NOT ending with )
        // regex: \]\(http[^)]*($|\s) matches "](http://url" at EOL or "](http://url "
        const missingParenMatch = line.match(/\]\((http[^)]+?)($|\s)/);
        if (missingParenMatch) {
            errors.push(`Possible broken link (missing closing parenthesis or trailing space): ${missingParenMatch[1]}`);
        }

        // Check 5: Double parenthesis in link
        // specific pattern: ](url))
        // This is often valid if inside parenthesis: (See [Link](url))
        // We only flag if parentheses are UNBALANCED in the line.
        if (/\]\([^)]+\)\)/.test(line)) {
            const openParens = (line.match(/\(/g) || []).length;
            const closeParens = (line.match(/\)/g) || []).length;
            if (closeParens > openParens) {
                errors.push('Double closing parenthesis in link (Unbalanced)');
            }
        }

        // Check 6: Double spaces
        // We want to avoid double spaces in the text, but ignore leading indentation.
        // We trim start of line to ignore indentation, then check for "  ".
        const trimmedLine = line.trimStart();
        if (trimmedLine.includes('  ')) {
            errors.push('Double space detected');
        }

        // Check 7: Broken Bold Syntax
        // Pattern: ** Text**, **Text **, or ** Text **
        // We temporarily replace inline code to avoid false positives
        const boldLine = line.replace(/`[^`]+`/g, 'PLACEHOLDER');
        if (boldLine.includes('**')) {
            const parts = boldLine.split('**');
            // Check odd segments (inside the stars)
            for (let i = 1; i < parts.length; i += 2) {
                // Ensure we have a closing pair on this line
                if (i + 1 < parts.length) {
                    const text = parts[i];
                    if (text.length > 0 && (/^\s/.test(text) || /\s$/.test(text))) {
                        errors.push(`Broken bold syntax (leading/trailing space) in "**${text}**"`);
                    }
                }
            }
        }
        // Check 8: Asymmetric spaces around slash
        // We must exclude URLs (http://...)
        const lineWithoutLinks = line.replace(/https?:\/\/[^\s)]+/g, 'LINK_PLACEHOLDER');

        // Ignore VitePress sidebar links (e.g. "link: /foo")
        if (!/^\s*link:/i.test(line)) {
            // A. Missing space after slash: " /Word"
            // Exception: /> (HTML close tag)
            // Exception: /Word/ (Path/Board e.g. /co/)
            const missingSpaceAfter = lineWithoutLinks.matchAll(/\s\/([^\s]+)/g);
            for (const match of missingSpaceAfter) {
                const wordAfter = match[1];
                if (wordAfter.startsWith('>')) continue; // Ignore />
                // Ignore paths (e.g. /bin), subreddits (/r/foo), or compound words (Word/Word)
                if (wordAfter.includes('/')) continue;

                errors.push(`Missing space after slash (e.g. "Word /Word"): "${match[0]}"`);
                break;
            }

            // B. Missing space before slash: "Word/ "
            // Exceptions: w/ (with), r/ (reddit), u/ (user), c/ (community)
            // Exception: /Word/ (Path/Board e.g. /b/)
            const missingSpaceBefore = lineWithoutLinks.matchAll(/([^\s]+)\/\s/g);
            for (const match of missingSpaceBefore) {
                const wordBefore = match[1];
                // Allow common abbreviations: w/, r/, u/, c/
                if (/^(w|r|u|c)$/i.test(wordBefore)) continue;
                // Allow paths ending in slash or containing slash: /b/ or [/int
                if (wordBefore.includes('/')) continue;

                errors.push(`Missing space before slash (e.g. "Word/ Word"): "${match[0]}"`);
                break;
            }

            // C. Double slash separated by spaces: "/ /"
            if (/\/\s+\//.test(lineWithoutLinks)) {
                errors.push('Double slash with spaces detected (e.g. "/ /")');
            }
        }


        // Check 9: Adjacent links without separator (e.g. "Text [Link]" instead of "Text / [Link]")
        const FILES_TO_IGNORE_LINK_SEPARATOR_CHECK = [
            'docs/beginners-guide.md',
            'docs/unsafe.md'
        ];

        if (!FILES_TO_IGNORE_LINK_SEPARATOR_CHECK.some(ignoredFile => file.endsWith(ignoredFile))) {
            const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
            let match;
            while ((match = linkRegex.exec(line)) !== null) {
                const index = match.index;
                if (index === 0) continue;

                const preceding = line.slice(0, index);

                // Ignore if line starts with valid list marker followed immediately by this link
                // e.g. "* [Link]" or "- [Link]" or "1. [Link]"
                if (/^\s*([*+-]|\d+\.)\s*$/.test(preceding)) continue;
                // Ignore if Starred item "* ⭐ [Link]"
                if (/^\s*[*+-]\s+⭐\s*$/.test(preceding)) continue;
                // Ignore if link is preceded by bold/italic markers only (start of line)
                if (/^\s*[*+-]\s+[*_]+\s*$/.test(preceding)) continue;

                const trimmedPreceding = preceding.trimEnd();
                if (trimmedPreceding.length === 0) continue;

                // Check last character
                const lastChar = trimmedPreceding.slice(-1);
                // Allowed: separators, openers, end of sentences
                // ! for images (![Alt]), * for bold, ( for parens, etc.
                const allowedChars = ['/', '-', ',', '(', '&', '>', ':', '|', '*', '!', '.', '?', ';', '_', '⭐', '+', '#', '►', '▷'];
                if (allowedChars.includes(lastChar)) continue;

                // Check for allowed functional words (prepositions, conjunctions, determiners, etc.)
                // to avoid flagging sentences like "Try a [VPN]" or "Use [Adblock]"
                const allowedWords = [
                    'or', 'and',
                    'a', 'an', 'the',
                    'use', 'using', 'via', 'with',
                    'in', 'on', 'at', 'by',
                    'to', 'for', 'from',
                    'check', 'see', 'try',
                    'requires', 'including', 'includes',
                    'that', 'your', 'our',
                    'of', 'about', 'their', 'join', 'getting', 'most',
                    'like', 'every', 'being', 'mostly', 'highly', 'up', 'we', 'optionally'
                ];
                const wordRegex = new RegExp(`(^|[^a-zA-Z0-9])(${allowedWords.join('|')})$`, 'i');
                if (wordRegex.test(trimmedPreceding)) continue;

                errors.push(`Missing separator before link (expected "/", "or", ",", etc): "...${preceding.slice(-10)}[${match[1]}]..."`);
            }
        }

        // Check 13: Duplicate Descriptions
        const isTempMailSection = relativePath === 'docs/internet-tools.md' && currentHeader.includes('Temp Mail');
        if (line.includes('/') && !isTempMailSection) {
            const BLOCK_SPLIT = '___BLOCK_SPLIT___';
            const lineCleanedLinks = line.replace(/(\*\*|__)?\[[^\]]+\]\([^)]+\)(\*\*|__)?/g, BLOCK_SPLIT);
            const blocks = lineCleanedLinks.split(BLOCK_SPLIT);

            blocks.forEach(block => {
                if (!block || !block.includes('/')) return;

                // Split by " / " (slash surrounded by spaces) to avoid matching paths (/bin), w/ (w/ acc), TCP/IP
                // This assumes standard formatting (Check 8 enforces spaces)
                const parts = block.split(/\s+\/\s+/);
                if (parts.length < 2) return;

                const seenDescriptions = new Set();
                parts.forEach(part => {
                    let desc = part.trim();
                    desc = desc.replace(/^[\s\-\*⭐]+/, '').replace(/[\s\-\*⭐]+$/, '');

                    if (!desc) return;

                    const checkDesc = desc.toLowerCase();
                    if (seenDescriptions.has(checkDesc)) {
                        errors.push(`Duplicate description detected: "${desc}"`);
                    } else {
                        seenDescriptions.add(checkDesc);
                    }
                });
            });
        }

        // Check 10, 11, 12: English-specific checks (Repeated words, Typos, Grammar)
        if (!isSeparatedEnglishCheck) {
            // Prepare clean line for text-based checks (remove URLs and Markdown links)
            // Remove entire link block: [Text](Url) -> "__LINK__" to avoid merging adjacent words
            const lineCleaned = line.replace(/https?:\/\/[^\s)]+/g, '')
                .replace(/\[[^\]]+\]\([^)]*\)/g, '__LINK__');

            // Check 10: Repeated words (e.g. "the the")
            const repeatedWordMatch = lineCleaned.match(/\b([a-zA-Z]+)\s+\1\b/i);
            if (repeatedWordMatch) {
                const word = repeatedWordMatch[1].toLowerCase();
                // Allow specific repeated words
                if (!['puyo', 'duran', 'agar', 'hocus'].includes(word)) {
                    errors.push(`Repeated word detected: "${repeatedWordMatch[0]}"`);
                }
            }

            // Check 11: Common Typos
            // Check 11: Common Typos from CSV
            // We load this once usually, but here for simplicity we assume 'commonTyposMap' is prepared.
            // Actually, let's just stick to the hardcoded list for now as a fallback,
            // but if the CSV loading logic was added, we would use it.
            // Since we are inside the line loop, we shouldn't load the file here.
            // The loading should happen outside. We will assume 'typosMap' exists.

            if (typeof typosMap !== 'undefined' && typosMap.size > 0) {
                // Unicode-aware split to avoid breaking words like "Română" or "Slovenčina"
                const words = lineCleaned.split(/[^\p{L}0-9']+/u);
                const ALLOWED_TYPOS = [
                    'hong', 'hls', 'troy', 'fami', 'rentry', 'typesafe', 'spritesheet', 'ba',
                    'puyo', 'moo', 'ne', 'nes', 'rg', 'rgshop', 'rgshows',
                    're', 'revanced', 'skipper', 'ste', 'sneedacity', 'rom', 'ide', 'luks', 'cse', 'gameboy',
                    'lan', 'pokemon', 'sa', 'cah', 'rin', 'tx', 'mame'
                ];
                for (const word of words) {
                    const lowerWord = word.toLowerCase();
                    if (typosMap.has(lowerWord) && !ALLOWED_TYPOS.includes(lowerWord)) {
                        errors.push(`Possible typo: "${word}" (should be "${typosMap.get(lowerWord)}")`);
                    }
                }
            } else {
                // Fallback to small list if CSV not loaded
                const commonTypos = {
                    'teh': 'the', 'adn': 'and', 'thier': 'their', 'dont': "don't", 'cant': "can't",
                    'wont': "won't", 'occured': 'occurred', 'seperate': 'separate',
                    'independant': 'independent', 'reccomend': 'recommend', 'recieve': 'receive',
                    'adress': 'address', 'neccessary': 'necessary', 'tring': 'trying', 'availalbe': 'available'
                };
                for (const [typo, correction] of Object.entries(commonTypos)) {
                    const typoRegex = new RegExp(`\\b${typo}\\b`, 'i');
                    if (typoRegex.test(line)) {
                        if (!/http/.test(line)) {
                            errors.push(`Possible typo: "${typo}" (should be "${correction}")`);
                        }
                    }
                }
            }

            // Check 12: Basic A/An Grammar
            const aAnMatch = line.match(/\b(a)\s+([aeio]\w+)/i);
            if (aAnMatch) {
                const word = aAnMatch[2].toLowerCase();
                if (word !== 'one') {
                    errors.push(`Incorrect article "a" usage: "${aAnMatch[0]}" (should be "an")`);
                }
            }

            const anAMatch = line.match(/\b(an)\s+([bcdfVkLmMnNpPqQrRsStTvVwWxXyYzZ]\w+)/i);
            if (anAMatch) {
                const word = anAMatch[2];
                const isAcronym = /^[A-Z0-9]+$/.test(word);
                if (!isAcronym) {
                    errors.push(`Incorrect article "an" usage: "${anAMatch[0]}" (should be "a")`);
                }
            }
        }

        if (errors.length > 0) {
            hasErrors = true;
            errors.forEach(err => {
                // file:line - Error (in red/cyan)
                console.log(`\x1b[36m${relativePath}:${lineNum}\x1b[0m - \x1b[31m${err}\x1b[0m`);
                // Source line (dimmed)
                console.log(`  \x1b[90m${line.trim()}\x1b[0m`);
            });
        }
    });
});

if (!hasErrors) {
    console.log('✅ No formatting issues found.');
} else {
    // console.log('\n❌ Issues found. Please review.');
    process.exit(1);
}