chore: init

2026-03-11 08:55:39 +00:00 · 2024-08-08 21:13:58 +00:00 · 2024-08-08 21:13:58 +00:00 · 4d7b8e742e
commit 4d7b8e742e
6 changed files with 32015 additions and 0 deletions
--- a/.github/workflows/run_make_fmhy_bookmarks.yml
+++ b/.github/workflows/run_make_fmhy_bookmarks.yml
@ -0,0 +1,38 @@
+name: Run make_fmhy_bookmarks.py
+
+on:
+  schedule:
+    - cron: '0 0 * * 1' # This means the action will run every Monday at midnight UTC
+  workflow_dispatch: # Allows the workflow to be run manually from the GitHub UI
+
+jobs:
+  run_script:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x' # Specify the Python version you need
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+    - name: Run make_fmhy_bookmarks.py
+      run: python make_fmhy_bookmarks.py
+
+    - name: Commit changes
+      run: |
+        git config --global user.name 'github-actions[bot]'
+        git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+        git add fmhy_in_bookmarks.html
+        git add fmhy_in_bookmarks_starred_only.html
+        git commit -m 'Update fmhy_in_bookmarks.html'
+        git push
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/README.md
+++ b/README.md
@ -0,0 +1,21 @@
+This repository is programmed to automatically generate browser bookmarks for the link collection FMHY.
+
+Bookmarks are generated as HTML files which can be imported into any web browser.
+![](https://i.imgur.com/N2Wfngc.png)
+
+The HTML files are automatically updated weekly with the new changes from FMHY.
+
+## Why?
+Web browsers have auto-complete and search functions that are based on bookmarked pages, so its helpful to have interesting sites bookmarked, so you can find them quicker and make sure you are on the right URL.
+
+
+## How does it look once imported?
+![](https://i.imgur.com/h1GfL1W.png)
+
+
+## How to download the importable HTML files?
+![](https://i.imgur.com/e4xN3wy.png)
+
+
+## How to import them into the browser?
+![](https://i.imgur.com/6BpWb1q.png)
--- a/fmhy_in_bookmarks.html
+++ b/fmhy_in_bookmarks.html
--- a/fmhy_in_bookmarks_starred_only.html
+++ b/fmhy_in_bookmarks_starred_only.html
--- a/make_fmhy_bookmarks.py
+++ b/make_fmhy_bookmarks.py
@ -0,0 +1,270 @@
+
+import requests
+
+def addPretext(lines, sectionName, baseURL, subURL):
+    modified_lines = []
+    currMdSubheading = ""
+    currSubCat = ""
+    currSubSubCat = ""
+
+    #Remove from the lines any line that isnt a heading and doesnt contain the character `⭐`
+    #lines = [line for line in lines if line.startswith("#") or '⭐' in line]
+
+    #Parse headings
+    for line in lines:
+        if line.startswith("#"): #Title Lines
+            if not subURL=="storage":
+                if line.startswith("# ►"):
+                    currMdSubheading = "#" + line.replace("# ►", "").strip().replace(" / ", "-").replace(" ", "-").lower()
+                    currSubCat = line.replace("# ►", "").strip()
+                    currSubSubCat = "/"
+                elif line.startswith("## ▷"):
+                    if not subURL=="non-english": #Because non-eng section has multiple subsubcats with same names
+                        currMdSubheading = "#" + line.replace("## ▷", "").strip().replace(" / ", "-").replace(" ", "-").lower()
+                    currSubSubCat = line.replace("## ▷", "").strip()
+            elif subURL=="storage":
+                if line.startswith("## "):
+                    currMdSubheading = "#" + line.replace("## ", "").strip().replace(" / ", "-").replace(" ", "-").lower()
+                    currSubCat = line.replace("## ", "").strip()
+                    currSubSubCat = "/"
+                elif line.startswith("### "):
+                    currMdSubheading = "#" + line.replace("### ", "").strip().replace(" / ", "-").replace(" ", "-").lower()
+                    currSubSubCat = line.replace("### ", "").strip()
+
+            # Remove links from subcategory titles (because the screw the format)
+            if 'http' in currSubCat: currSubCat = ''
+            if 'http' in currSubSubCat: currSubSubCat = ''
+
+        elif any(char.isalpha() for char in line): #If line has content
+            preText = f"{{\"{sectionName.replace(".md", "")}\", \"{currSubCat}\", \"{currSubSubCat}\"}}"
+            if line.startswith("* "): line = line[2:]
+            modified_lines.append(preText + line)
+
+    return modified_lines
+
+
+#----------------base64 page processing------------
+import base64
+import re
+
+doBase64Decoding = True
+
+def fix_base64_string(encoded_string):
+    missing_padding = len(encoded_string) % 4
+    if missing_padding != 0:
+        encoded_string += '=' * (4 - missing_padding)
+    return encoded_string
+
+def decode_base64_in_backticks(input_string):
+    def base64_decode(match):
+        encoded_data = match.group(0)[1:-1]  # Extract content within backticks
+        decoded_bytes = base64.b64decode( fix_base64_string(encoded_data) )
+        return decoded_bytes.decode()
+
+    pattern = r"`[^`]+`"  # Regex pattern to find substrings within backticks
+    decoded_string = re.sub(pattern, base64_decode, input_string)
+    return decoded_string
+
+def remove_empty_lines(text):
+    lines = text.split('\n')  # Split the text into lines
+    non_empty_lines = [line for line in lines if line.strip()]  # Filter out empty lines
+    return '\n'.join(non_empty_lines)  # Join non-empty lines back together
+
+def extract_base64_sections(base64_page):
+    sections = base64_page.split("***")  # Split the input string by "***" to get sections
+    formatted_sections = []
+    for section in sections:
+        formatted_section = remove_empty_lines( section.strip().replace("#### ", "").replace("\n\n", " - ").replace("\n", ", ") )
+        if doBase64Decoding: formatted_section = decode_base64_in_backticks(formatted_section)
+        formatted_section = '[🔑Base64](https://rentry.co/FMHYBase64) ► ' + formatted_section
+        formatted_sections.append(formatted_section)
+    lines = formatted_sections
+    return lines
+#----------------</end>base64 page processing------------
+
+
+
+def dlWikiChunk(fileName, icon, redditSubURL):
+
+    #first, try to get the chunk locally
+    try:
+        #First, try to get it from the local file
+        print("Loading " + fileName + " from local file...")
+        with open(fileName.lower(), 'r') as f:
+            page = f.read()
+        print("Loaded.\n")
+    #if not available locally, download the chunk
+    except:
+        if not fileName=='base64.md':
+            print("Local file not found. Downloading " + fileName + " from Github...")
+            page = requests.get("https://raw.githubusercontent.com/fmhy/FMHYedit/main/docs/" + fileName.lower()).text
+        elif fileName=='base64.md':
+            print("Local file not found. Downloading rentry.co/FMHYBase64...")
+            page = requests.get("https://rentry.co/FMHYBase64/raw").text.replace("\r", "")
+        print("Downloaded")
+
+    #add a pretext
+    redditBaseURL = "https://www.reddit.com/r/FREEMEDIAHECKYEAH/wiki/"
+    siteBaseURL = "https://fmhy.net/"
+    if not fileName=='base64.md':
+        pagesDevSiteSubURL = fileName.replace(".md", "").lower()
+        subURL = pagesDevSiteSubURL
+        lines = page.split('\n')
+        lines = addPretext(lines, fileName, siteBaseURL, subURL)
+    elif fileName=='base64.md':
+        lines = extract_base64_sections(page)
+
+    return lines
+
+def cleanLineForSearchMatchChecks(line):
+    siteBaseURL = "https://fmhy.net/"
+    redditBaseURL = "https://www.reddit.com/r/FREEMEDIAHECKYEAH/wiki/"
+    return line.replace(redditBaseURL, '/').replace(siteBaseURL, '/')
+
+def alternativeWikiIndexing():
+    wikiChunks = [
+        dlWikiChunk("VideoPiracyGuide.md", "📺", "video"),
+        dlWikiChunk("AI.md", "🤖", "ai"),
+        dlWikiChunk("Android-iOSGuide.md", "📱", "android"),
+        dlWikiChunk("AudioPiracyGuide.md", "🎵", "audio"),
+        dlWikiChunk("DownloadPiracyGuide.md", "💾", "download"),
+        dlWikiChunk("EDUPiracyGuide.md", "🧠", "edu"),
+        dlWikiChunk("GamingPiracyGuide.md", "🎮", "games"),
+        dlWikiChunk("AdblockVPNGuide.md", "📛", "adblock-vpn-privacy"),
+        dlWikiChunk("System-Tools.md", "💻", "system-tools"),
+        dlWikiChunk("File-Tools.md", "🗃️", "file-tools"),
+        dlWikiChunk("Internet-Tools.md", "🔗", "internet-tools"),
+        dlWikiChunk("Social-Media-Tools.md", "💬", "social-media"),
+        dlWikiChunk("Text-Tools.md", "📝", "text-tools"),
+        dlWikiChunk("Video-Tools.md", "📼", "video-tools"),
+        dlWikiChunk("MISCGuide.md", "📂", "misc"),
+        dlWikiChunk("ReadingPiracyGuide.md", "📗", "reading"),
+        dlWikiChunk("TorrentPiracyGuide.md", "🌀", "torrent"),
+        dlWikiChunk("img-tools.md", "📷", "img-tools"),
+        dlWikiChunk("gaming-tools.md", "👾", "gaming-tools"),
+        dlWikiChunk("LinuxGuide.md", "🐧🍏", "linux"),
+        dlWikiChunk("DEVTools.md", "🖥️", "dev-tools"),
+        dlWikiChunk("Non-English.md", "🌏", "non-eng"),
+        dlWikiChunk("STORAGE.md", "🗄️", "storage"),
+        #dlWikiChunk("base64.md", "🔑", "base64"),
+        dlWikiChunk("NSFWPiracy.md", "🌶", "https://saidit.net/s/freemediafuckyeah/wiki/index")
+    ]
+    return [item for sublist in wikiChunks for item in sublist] #Flatten a <list of lists of strings> into a <list of strings>
+#--------------------------------
+
+
+# Save the result of alternativeWikiIndexing to a .md file
+# with open('wiki_adapted.md', 'w') as f:
+#     for line in alternativeWikiIndexing():
+#         f.write(line + '\n')
+
+# Instead of saving it to a file, save it into a string variable
+wiki_adapted_md = '\n'.join(alternativeWikiIndexing())
+
+# Remove from the lines in wiki_adapted_md any line that doesnt contain the character `⭐`
+wiki_adapted_starred_only_md = '\n'.join([line for line in wiki_adapted_md.split('\n') if '⭐' in line])
+
+
+
+import re
+
+def markdown_to_html_bookmarks(input_md_text, output_file):
+    # Predefined folder name
+    folder_name = "FMHY"
+    
+    # Read the input markdown file
+    #with open(input_file, 'r', encoding='utf-8') as f:
+    #    markdown_content = f.read()
+
+    # Instead of reading from a file, read from a string variable
+    markdown_content = input_md_text
+    
+    # Regex pattern to extract URLs and titles from markdown
+    url_pattern = re.compile(r'\[([^\]]+)\]\((https?://[^\)]+)\)')
+    # Regex pattern to extract hierarchy levels
+    hierarchy_pattern = re.compile(r'^\{"([^"]+)", "([^"]+)", "([^"]+)"\}')
+    
+    # Dictionary to hold bookmarks by hierarchy
+    bookmarks = {}
+    
+    # Split the content by lines
+    lines = markdown_content.split('\n')
+    
+    # Parse each line
+    for line in lines:
+        # Find hierarchy levels
+        hierarchy_match = hierarchy_pattern.match(line)
+        if not hierarchy_match:
+            continue
+
+        level1, level2, level3 = hierarchy_match.groups()
+        
+        # Initialize nested dictionaries for hierarchy levels
+        if level1 not in bookmarks:
+            bookmarks[level1] = {}
+        if level2 not in bookmarks[level1]:
+            bookmarks[level1][level2] = {}
+        if level3 not in bookmarks[level1][level2]:
+            bookmarks[level1][level2][level3] = []
+
+        # Find all matches in the line for URLs
+        matches = url_pattern.findall(line)
+
+        # If the input_md_text is wiki_adapted_starred_only_md, only add the first match of url_pattern in each line
+        if input_md_text == wiki_adapted_starred_only_md:
+            matches = matches[:1]
+        
+        # Extract the description (text after the last match)
+        last_match_end = line.rfind(')')
+        description = line[last_match_end+1:].replace('**', '').strip() if last_match_end != -1 else ''
+        
+        # When the description is empty, use as description the lowest hierachy level that is not empty
+        if not description:
+            description = '- ' + (level3 if level3 != '/' else level2 if level2 else level1)
+
+        # Add matches to the appropriate hierarchy
+        for title, url in matches:
+            full_title = f"{title} {description}" if description else title
+            bookmarks[level1][level2][level3].append((full_title, url))
+    
+    # Function to generate HTML from nested dictionary
+    def generate_html(bookmarks_dict, indent=1):
+        html = ''
+        for key, value in bookmarks_dict.items():
+            html += '    ' * indent + f'<DT><H3>{key}</H3>\n'
+            html += '    ' * indent + '<DL><p>\n'
+            if isinstance(value, dict):
+                html += generate_html(value, indent + 1)
+            else:
+                for full_title, url in value:
+                    html += '    ' * (indent + 1) + f'<DT><A HREF="{url}" ADD_DATE="0">{full_title}</A>\n'
+            html += '    ' * indent + '</DL><p>\n'
+        return html
+    
+    # HTML structure
+    html_content = '''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+'''
+    # Add the main folder
+    html_content += f'    <DT><H3>{folder_name}</H3>\n'
+    html_content += '    <DL><p>\n'
+    
+    # Add bookmarks to HTML content
+    html_content += generate_html(bookmarks)
+    
+    html_content += '    </DL><p>\n'
+    html_content += '</DL><p>\n'
+    
+    # Write the HTML content to the output file
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+
+    # Print success message
+    #print(f'Successfully created bookmarks in {output_file}')
+
+# Example usage:
+markdown_to_html_bookmarks(wiki_adapted_md, 'fmhy_in_bookmarks.html')
+markdown_to_html_bookmarks(wiki_adapted_starred_only_md, 'fmhy_in_bookmarks_starred_only.html')
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1 @@
+requests