Moving codebase from GitHub

This commit is contained in:
ZhymabekRoman 2024-01-31 06:48:20 +06:00
commit fe878c92a7
108 changed files with 54042 additions and 0 deletions

4
.env_template Normal file
View file

@ -0,0 +1,4 @@
HOST_ADDRESS = "http://localhost:6752"
TIMEOUT=3
ADMIN_SECRET_KEY="test"
MEDIUM_AUTH_COOKIES="Get your premium subscription account coockies here, uid and sid properties is required"

167
.gitignore vendored Normal file
View file

@ -0,0 +1,167 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
user_data/
*.dist/
*.build/
medium_cache.sqlite
medium_db_cache.sqlite
ban_post_list.db

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "server/toolkits/core"]
path = server/toolkits/core
url = https://github.com/Freedium-cfd/core

158
CaddyfileDev Normal file
View file

@ -0,0 +1,158 @@
:6752 {
# header Server "nginx"
encode gzip
header -Server
handle_path /site.webmanifest {
root * ./static/site.webmanifest
file_server
}
handle_path /favicon-32x32.png {
root * ./static/favicon-32x32.png
file_server
}
handle_path /robots.txt {
root * ./static/robots.txt
file_server
}
handle_path /ads.txt {
root * ./static/ads.txt
file_server
}
handle_path /humans.txt {
root * ./static/humans.txt
file_server
}
handle_path /mstile-150x150.png {
root * ./static/mstile-150x150.png
file_server
}
handle_path /mstile-310x310.png {
root * ./static/mstile-310x310.png
file_server
}
handle_path /sitemap.xml {
root * ./static/sitemap.xml
file_server
}
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
file_server
}
handle_path /mstile-70x70.png {
root * ./static/mstile-70x70.png
file_server
}
handle_path /android-chrome-192x192.png {
root * ./static/android-chrome-192x192.png
file_server
}
handle_path /mstile-310x150.png {
root * ./static/mstile-310x150.png
file_server
}
handle_path /safari-pinned-tab.svg {
root * ./static/safari-pinned-tab.svg
file_server
}
handle_path /android-chrome-512x512.png {
root * ./static/android-chrome-512x512.png
file_server
}
handle_path /favicon-16x16.png {
root * ./static/favicon-16x16.png
file_server
}
handle_path /favicon.ico {
root * ./static/favicon.ico
file_server
}
handle_path /browserconfig.xml {
root * ./static/browserconfig.xml
file_server
}
handle_path /mstile-144x144.png {
root * ./static/mstile-144x144.png
file_server
}
handle_path /security.txt {
root * ./static/security.txt
file_server
}
handle_path /apple-touch-icon.png {
root * ./static/apple-touch-icon.png
file_server
}
handle_path /onboarding/* {
respond "Access denied" 403
}
handle_path /wp-* {
respond "Access denied" 403
}
handle_path /.env {
respond "Access denied" 403
}
handle_path /api* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-precomposed.png {
respond "Access denied" 403
}
handle_path /rss.xml {
respond "Access denied" 403
}
handle_path /.git/* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120-precomposed.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152-precomposed.png {
respond "Access denied" 403
}
handle_path /.well-known/* {
respond "Access denied" 403
}
route /* {
reverse_proxy localhost:7080
}
}

11
CaddyfileDevTemplate Normal file
View file

@ -0,0 +1,11 @@
:6752 {
# header Server "nginx"
encode gzip
header -Server
{{ template }}
route /* {
reverse_proxy localhost:7080
}
}

31
CaddyfileMaintance Normal file
View file

@ -0,0 +1,31 @@
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
freedium.cfd {
# header Server "nginx"
encode gzip
header -Server
route /* {
header Content-Type text/html
respond <<HTML
<!doctype html>
<title>Site Maintenance</title>
<style>
body { text-align: center; padding: 150px; }
h1 { font-size: 50px; }
body { font: 20px Helvetica, sans-serif; color: #333; }
article { display: block; text-align: left; width: 650px; margin: 0 auto; }
a { color: #dc8100; text-decoration: none; }
a:hover { color: #333; text-decoration: none; }
</style>
<article>
<h1>We&rsquo;ll be back soon!</h1>
<div>
<p>Sorry for the inconvenience but we&rsquo;re performing some maintenance at the moment. If you need to you can always <a href="mailto:#admin@freedium.cfd">contact us</a>, otherwise we&rsquo;ll be back online shortly!</p>
<p>&mdash; Freedium developers</p>
</div>
</article>
HTML 200
}
}

159
CaddyfileProd Normal file
View file

@ -0,0 +1,159 @@
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
freedium.cfd {
# header Server "nginx"
encode gzip
header -Server
handle_path /site.webmanifest {
root * ./static/site.webmanifest
file_server
}
handle_path /favicon-32x32.png {
root * ./static/favicon-32x32.png
file_server
}
handle_path /robots.txt {
root * ./static/robots.txt
file_server
}
handle_path /ads.txt {
root * ./static/ads.txt
file_server
}
handle_path /humans.txt {
root * ./static/humans.txt
file_server
}
handle_path /mstile-150x150.png {
root * ./static/mstile-150x150.png
file_server
}
handle_path /mstile-310x310.png {
root * ./static/mstile-310x310.png
file_server
}
handle_path /sitemap.xml {
root * ./static/sitemap.xml
file_server
}
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
file_server
}
handle_path /mstile-70x70.png {
root * ./static/mstile-70x70.png
file_server
}
handle_path /android-chrome-192x192.png {
root * ./static/android-chrome-192x192.png
file_server
}
handle_path /mstile-310x150.png {
root * ./static/mstile-310x150.png
file_server
}
handle_path /safari-pinned-tab.svg {
root * ./static/safari-pinned-tab.svg
file_server
}
handle_path /android-chrome-512x512.png {
root * ./static/android-chrome-512x512.png
file_server
}
handle_path /favicon-16x16.png {
root * ./static/favicon-16x16.png
file_server
}
handle_path /favicon.ico {
root * ./static/favicon.ico
file_server
}
handle_path /browserconfig.xml {
root * ./static/browserconfig.xml
file_server
}
handle_path /mstile-144x144.png {
root * ./static/mstile-144x144.png
file_server
}
handle_path /security.txt {
root * ./static/security.txt
file_server
}
handle_path /apple-touch-icon.png {
root * ./static/apple-touch-icon.png
file_server
}
handle_path /onboarding/* {
respond "Access denied" 403
}
handle_path /wp-* {
respond "Access denied" 403
}
handle_path /.env {
respond "Access denied" 403
}
handle_path /api* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-precomposed.png {
respond "Access denied" 403
}
handle_path /rss.xml {
respond "Access denied" 403
}
handle_path /.git/* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120-precomposed.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152-precomposed.png {
respond "Access denied" 403
}
handle_path /.well-known/* {
respond "Access denied" 403
}
route /* {
reverse_proxy localhost:7080
}
}

12
CaddyfileProdTemplate Normal file
View file

@ -0,0 +1,12 @@
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
freedium.cfd {
# header Server "nginx"
encode gzip
header -Server
{{ template }}
route /* {
reverse_proxy localhost:7080
}
}

53
README.md Normal file
View file

@ -0,0 +1,53 @@
<p align="center"><a href="https://iosf.in/" target="_blank"><img src="https://avatars.githubusercontent.com/u/142643505?s=200&v=4" width="20%"></a></p>
<h1 align="center">Freedium: Your paywall breakthrough for Medium!</h1>
[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/zhymabekroman)
## FAQ
### What is happened to GitHub organization?
Our whole Github organization is not public for now. Reddit community, that was beginning all of that unfourtunately also gone. So we have moved to Codeberg
### Why did we create Freedium?
In mid-June to mid-July 2023, Medium changed their paywall method, and all old paywall bypass methods we had stopped working. So I became obsessed with the idea of creating a service to bypass Medium's paywalled posts. Honestly I am not a big fan of Medium, but I sometimes read articles to improve my knowledge.
### How does Freedium work?
In the first version of Freedium, we reverse-engineered Medium.com's GraphQL endpoints and built our own parser and toolkits to show you unpaywalled Medium posts. Unfortunately, Medium closed this loophole and nowadays we just pay subscriptions and share access through Freedium. Sometimes we got a bugs because of the self-written parser, but we are working to make Freedium bug-free.
### What language are being used?
We use Python, with Jinja template builder, and some JS magic in Frontend :)
### Wow! I would like to contribute to Freedium. How can I do that?
We need volunteers who have Medium subscriptions because we might get banned by Medium. And if you developer you can start from the this (https://codeberg.org/Freedium-cfd/web) repository.
### Plans, future?
Speed up Freedium, and probably create open source Medium frontend in next life
## Tech stack:
- FastAPI, Gunicorn, Unicorn as worker,
- Tailwinds CSS v3
- Dragonfly (Redis like key-value database)
- Jinja2
- Python 3.9+
- Caddy
- Sentry
## Local run:
Requirements:
- Medium subscription
- Python 3.9+
```bash
git clone https://github.com/Freedium-cfd/web ./web
cd ./web
pip install -r requirements.txt
# for linux also do: pip install -r requirements-fast.txt
pip install ./core
pip install ./rl_string_helper
```
Now we need configure our Freedium instance. Copy `.env_template` to `.env` configuration file and set values, required for you.
If you have linux, execute `./script/start_dev.sh` and open in browser 'localhost:6752'. That will execute Caddy reverse proxy.
If you have other OS or want test without reverse proxy, you can execute server using command `python3 -m server server` and access by address 'localhost:7080':

2
bin/versions.txt Normal file
View file

@ -0,0 +1,2 @@
dragonfly==1.7.1
caddy==2.7.6

BIN
bin/x86_64/caddy Executable file

Binary file not shown.

BIN
bin/x86_64/dragonfly Executable file

Binary file not shown.

164
core/.gitignore vendored Normal file
View file

@ -0,0 +1,164 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
medium_cache.sqlite
query_result.json
medium.html

3
core/.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "medium_parser/toolkits/rl_string_helper"]
path = medium_parser/toolkits/rl_string_helper
url = https://github.com/Freedium-cfd/rl-string-helper

8
core/README.md Normal file
View file

@ -0,0 +1,8 @@
# Medium.com GraphQL API parser
This repo is containing a parser for the Medium GraphQL API.
## Export format:
- HTML (with Tailwinds CSS)
## TODO:
- Add Markdown export support

View file

@ -0,0 +1,15 @@
from aiohttp_retry import ExponentialRetry
import jinja2
from .cache_db import SQLiteCacheBackend
cache = SQLiteCacheBackend('medium_db_cache.sqlite')
cache.init_db()
retry_options = ExponentialRetry(attempts=3)
from . import exceptions as exceptions
from . import exceptions as medium_parser_exceptions
jinja_env = jinja2.Environment(enable_async=True)

View file

@ -0,0 +1,89 @@
from typing import Union
import sqlite3
import json
from warnings import warn
try:
import sqlite_zstd
except ImportError:
warn("Can't use zstd compression. Please install 'sqlite_zstd' package")
sqlite_zstd = None
class CacheResponse:
__slots__ = ('data')
def __init__(self, data: str):
self.data = data
def json(self):
return json.loads(self.data)
def __repr__(self):
return self.data
def __str__(self):
return self.data
class SQLiteCacheBackend:
__slots__ = ('connection', 'cursor')
def __init__(self, database: str):
self.connection = sqlite3.connect(database)
self.connection.enable_load_extension(True) # Enable loading of extensions
self.connection.execute("PRAGMA foreign_keys = ON") # Need for working with foreign keys in db
self.connection.execute("PRAGMA journal_mode=WAL")
self.connection.execute("PRAGMA auto_vacuum=full")
self.cursor = self.connection.cursor()
if sqlite_zstd is not None:
sqlite_zstd.load(self.connection)
def all(self):
with self.connection:
return self.cursor.execute("SELECT * FROM cache").fetchall()
def all_length(self) -> int:
with self.connection:
return self.cursor.execute("SELECT COUNT(*) FROM cache").fetchone()[0]
def random(self, size: int):
with self.connection:
return self.cursor.execute("SELECT * FROM cache ORDER BY RANDOM() LIMIT :0", {'0': size}).fetchall()
def enable_zstd(self):
if sqlite_zstd is None:
raise ValueError("Can't use zstd compression. Please install 'sqlite_zstd' package")
with self.connection:
self.cursor.execute("SELECT zstd_enable_transparent('{\"table\": \"cache\", \"column\": \"value\", \"compression_level\": 9, \"dict_chooser\": \"''a''\"}')")
try:
self.connection.execute("PRAGMA auto_vacuum=full")
except Exception as error:
print(error)
self.cursor.execute("SELECT zstd_incremental_maintenance(null, 1);")
self.cursor.execute("vacuum;")
def init_db(self):
with self.connection:
self.cursor.execute("CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value TEXT)")
def pull(self, key: str) -> Union[dict, str]:
with self.connection:
cache = self.cursor.execute("SELECT value FROM cache WHERE key = :0", {'0': key}).fetchone()
if cache:
return CacheResponse(cache[0])
def push(self, key: str, value: str) -> None:
if isinstance(value, dict):
value = json.dumps(value)
elif not isinstance(value, str):
raise ValueError(f"value argument should be only string type not {type(value).__name__}")
with self.connection:
self.cursor.execute("INSERT OR REPLACE INTO cache VALUES (:0, :1)", {'0': key, '1': value})
def delete(self, key: str) -> None:
with self.connection:
self.cursor.execute("DELETE FROM cache WHERE key = :0", {'0': key})
def close(self):
self.__del__()
def __del__(self) -> None:
self.connection.close()

434
core/medium_parser/core.py Normal file
View file

@ -0,0 +1,434 @@
import os
import math
import textwrap
import urllib.parse
import jinja2
import tld
from loguru import logger
from rl_string_helper import (RLStringHelper, parse_markups,
split_overlapping_ranges)
from . import cache, jinja_env
from .exceptions import (InvalidMediumPostID, InvalidMediumPostURL, InvalidURL,
MediumParserException, MediumPostQueryError)
from .medium_api import query_post_by_id
from .models.html_result import HtmlResult
from .time import convert_datetime_to_human_readable
from .utils import (get_medium_post_id_by_url, getting_percontage_of_match,
is_valid_medium_post_id_hexadecimal, is_valid_medium_url,
is_valid_url, sanitize_url)
class MediumParser:
__slots__ = ('__post_id', 'post_data', 'jinja', 'timeout', 'host_address', 'auth_cookies')
def __init__(self, post_id: str, timeout: int, host_address: str, auth_cookies: str = None):
self.timeout = timeout
self.host_address = host_address
self.post_id = post_id
self.post_data = None
self.auth_cookies = auth_cookies
@classmethod
async def from_url(cls, url: str, timeout: int, host_address: str) -> 'MediumParser':
sanitized_url = sanitize_url(url)
if is_valid_url(url) and not await is_valid_medium_url(sanitized_url, timeout):
raise InvalidURL(f'Invalid medium URL: {sanitized_url}')
post_id = await get_medium_post_id_by_url(sanitized_url, timeout)
if not post_id:
raise InvalidMediumPostURL(f'Could not find medium post ID for URL: {sanitized_url}')
return cls(post_id, timeout, host_address)
@property
def post_id(self):
return self.__post_id
@post_id.setter
def post_id(self, value):
if not is_valid_medium_post_id_hexadecimal(value):
raise InvalidMediumPostID(f'Invalid medium post ID: {value}')
self.__post_id = value
@post_id.getter
def post_id(self):
return self.__post_id
async def delete_from_cache(self, post_id: str = None):
if not post_id:
post_id = self.post_id
cache.delete(post_id)
return True
async def get_post_data_from_cache(self):
logger.debug("Using cache backend")
post_data = cache.pull(self.post_id)
if post_data:
logger.debug("post query was found on cache")
return post_data.json()
return None
async def get_post_data_from_api(self):
logger.debug("Cache backend disabled, using API")
try:
return await query_post_by_id(self.post_id, self.timeout, self.auth_cookies)
except Exception as ex:
logger.debug("Error while querying post by Medium API")
logger.exception(ex)
return None
async def query(self, use_cache: bool = True):
post_data = await self.get_post_data_from_cache() if use_cache else None
if not post_data:
post_data = await self.get_post_data_from_api()
if not post_data or not isinstance(post_data, dict) or post_data.get("error") or not post_data.get("data") or not post_data.get("data").get("post"):
raise MediumPostQueryError(f'Could not query post by ID from API: {self.post_id}')
cache.push(self.post_id, post_data)
self.post_data = post_data
return self.post_data
async def _parse_and_render_content_html_post(self, content: dict, title: str, subtitle: str, preview_image_id: str, highlights: list, tags: list) -> tuple[list, str, str]:
paragraphs = content["bodyModel"]["paragraphs"]
tags_list = [tag["displayTitle"] for tag in tags]
out_paragraphs = []
current_pos = 0
def parse_paragraph_text(text: str, markups: list, is_code: bool = False) -> str:
if is_code:
quote_html_type = ["minimal"]
else:
quote_html_type = ["full"]
text_formater = RLStringHelper(text, quote_html_type=quote_html_type)
parsed_markups = parse_markups(markups)
fixed_markups = split_overlapping_ranges(parsed_markups)
for markup in fixed_markups:
text_formater.set_template(markup["start"], markup["end"], markup["template"])
return text_formater
while len(paragraphs) > current_pos:
paragraph = paragraphs[current_pos]
logger.trace(f"Current paragraph #{current_pos} data: {paragraph}")
# For debugging stuff...
# if paragraph["id"] != "":
# current_pos += 1
# continue
if current_pos in range(4):
if paragraph["type"] in ["H3", "H4", "H2"]:
if getting_percontage_of_match(paragraph["text"], title) > 80:
logger.trace("Title was detected, ignore...")
current_pos += 1
continue
if paragraph["type"] in ["H4"]:
if paragraph["text"] in tags_list:
logger.trace("Tag was detected, ignore...")
current_pos += 1
continue
if paragraph["type"] in ["H4", "P"]:
is_paragraph_subtitle = getting_percontage_of_match(paragraph["text"], subtitle) > 80
if is_paragraph_subtitle and not subtitle.endswith(""):
logger.trace("Subtitle was detected, ignore...")
subtitle = paragraph["text"]
current_pos += 1
continue
elif subtitle and subtitle.endswith("") and len(paragraph["text"]) > 100:
subtitle = None
elif paragraph["type"] == "IMG":
if paragraph["metadata"]["id"] == preview_image_id:
logger.trace("Preview image was detected, ignore...")
current_pos += 1
continue
if paragraph["text"] is None:
text_formater = None
else:
text_formater = parse_paragraph_text(paragraph["text"], paragraph["markups"])
for highlight in highlights:
for highlight_paragraph in highlight["paragraphs"]:
if highlight_paragraph["name"] == paragraph["name"]:
logger.trace("Apply highlight to this paragraph")
if highlight_paragraph["text"] != text_formater.get_text():
logger.warning("Highlighted text and paragraph text are not the same! Skip...")
break
quote_markup_template = '<mark style="background-color: rgb(200 227 200);">{{ text }}</mark>'
text_formater.set_template(
highlight["startOffset"],
highlight["endOffset"],
quote_markup_template,
)
break
if paragraph["type"] == "H2":
css_class = []
if out_paragraphs:
css_class.append("pt-12")
header_template = jinja_env.from_string('<h2 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h2>')
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
out_paragraphs.append(header_template_rendered)
elif paragraph["type"] == "H3":
css_class = []
if out_paragraphs:
css_class.append("pt-12")
header_template = jinja_env.from_string('<h3 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h3>')
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
out_paragraphs.append(header_template_rendered)
elif paragraph["type"] == "H4":
css_class = []
if out_paragraphs:
css_class.append("pt-8")
header_template = jinja_env.from_string('<h4 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-l md:text-xl {{ css_class }}">{{ text }}</h4>')
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
out_paragraphs.append(header_template_rendered)
elif paragraph["type"] == "IMG":
image_template = jinja_env.from_string(
'<div class="mt-7"><img alt="{{ paragraph.metadata.alt }}" style="margin: auto;" class="pt-5 lazy" role="presentation" data-src="https://miro.medium.com/v2/resize:fit:700/{{ paragraph.metadata.id }}"></div>'
)
image_caption_template = jinja_env.from_string(
"<figcaption class='mt-3 text-sm text-center text-gray-500 dark:text-gray-200'>{{ text }}</figcaption>"
)
if paragraph["layout"] == "OUTSET_ROW":
image_templates_row = []
img_row_template = jinja_env.from_string('<div class="mx-5"><div class="flex flex-row justify-center">{{ images }}</div></div>')
image_template_rendered = await image_template.render_async(paragraph=paragraph)
image_templates_row.append(image_template_rendered)
_tmp_current_pos = current_pos + 1
while len(paragraphs) > _tmp_current_pos:
_paragraph = paragraphs[_tmp_current_pos]
if _paragraph["layout"] == "OUTSET_ROW_CONTINUE":
image_template_rendered = await image_template.render_async(paragraph=_paragraph)
image_templates_row.append(image_template_rendered)
else:
break
_tmp_current_pos += 1
img_row_template_rendered = await img_row_template.render_async(images="".join(image_templates_row))
out_paragraphs.append(img_row_template_rendered)
current_pos = _tmp_current_pos - 1
else:
image_template_rendered = await image_template.render_async(paragraph=paragraph)
out_paragraphs.append(image_template_rendered)
if paragraph["text"]:
out_paragraphs.append(await image_caption_template.render_async(text=text_formater.get_text()))
elif paragraph["type"] == "P":
css_class = ["leading-8"]
paragraph_template = jinja_env.from_string('<p class="{{ css_class }}">{{ text }}</p>')
if paragraphs[current_pos - 1]["type"] in ["H4", "H3"]:
css_class.append("mt-3")
else:
css_class.append("mt-7")
paragraph_template_rendered = await paragraph_template.render_async(text=text_formater.get_text(), css_class=" ".join(css_class))
out_paragraphs.append(paragraph_template_rendered)
elif paragraph["type"] == "ULI":
uli_template = jinja_env.from_string('<ul class="list-disc pl-8 mt-2">{{ li }}</ul>')
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
li_templates = []
_tmp_current_pos = current_pos
while len(paragraphs) > _tmp_current_pos:
_paragraph = paragraphs[_tmp_current_pos]
if _paragraph["type"] == "ULI":
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
li_template_rendered = await li_template.render_async(text=text_formater.get_text())
li_templates.append(li_template_rendered)
else:
break
_tmp_current_pos += 1
uli_template_rendered = await uli_template.render_async(li="".join(li_templates))
out_paragraphs.append(uli_template_rendered)
current_pos = _tmp_current_pos - 1
elif paragraph["type"] == "OLI":
ol_template = jinja_env.from_string('<ol class="list-decimal pl-8 mt-2">{{ li }}</ol>')
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
li_templates = []
_tmp_current_pos = current_pos
while len(paragraphs) > _tmp_current_pos:
_paragraph = paragraphs[_tmp_current_pos]
if _paragraph["type"] == "OLI":
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
li_template_rendered = await li_template.render_async(text=text_formater.get_text())
li_templates.append(li_template_rendered)
else:
break
_tmp_current_pos += 1
ol_template_rendered = await ol_template.render_async(li="".join(li_templates))
out_paragraphs.append(ol_template_rendered)
current_pos = _tmp_current_pos - 1
elif paragraph["type"] == "PRE":
pre_template = jinja_env.from_string('<pre class="p-4 mt-7 bg-gray-100 dark:bg-gray-900 flex flex-col justify-center">{{code_block}}</pre>')
code_block_template = jinja_env.from_string('<code class="overflow-x-auto mt-1 {{ code_css_class }} bg-gray-100 dark:bg-gray-900">{{ text }}</code>')
code_css_class = []
if paragraph["codeBlockMetadata"] and paragraph["codeBlockMetadata"]["lang"] is not None:
code_css_class.append(f'language-{paragraph["codeBlockMetadata"]["lang"]}')
else:
code_css_class.append('nohighlight')
code_list = []
_tmp_current_pos = current_pos
while len(paragraphs) > _tmp_current_pos:
_paragraph = paragraphs[_tmp_current_pos]
if _paragraph["type"] == "PRE":
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"], is_code=True)
code_list.append(text_formater.get_text())
else:
break
_tmp_current_pos += 1
code_block_template_rendered = await code_block_template.render_async(text="\n".join(code_list), code_css_class=" ".join(code_css_class))
pre_template_rendered = await pre_template.render_async(code_block=code_block_template_rendered)
out_paragraphs.append(pre_template_rendered)
current_pos = _tmp_current_pos - 1
elif paragraph["type"] == "BQ":
bq_template = jinja_env.from_string('<blockquote class="px-5 pt-3 pb-3 mt-5 shadow-lf"><p style="font-style: italic;">{{ text }}</p></blockquote>')
bq_template_rendered = await bq_template.render_async(text=text_formater.get_text())
logger.trace(bq_template_rendered)
out_paragraphs.append(bq_template_rendered)
elif paragraph["type"] == "PQ":
pq_template = jinja_env.from_string('<blockquote class="mt-7 text-2xl ml-5 text-gray-600 dark:text-gray-300"><p>{{ text }}</p></blockquote>')
pq_template_rendered = await pq_template.render_async(text=text_formater.get_text())
logger.trace(pq_template_rendered)
out_paragraphs.append(pq_template_rendered)
elif paragraph["type"] == 'MIXTAPE_EMBED':
embed_template = jinja_env.from_string("""
<div class="flex border border-gray-300 p-2 mt-7 items-center overflow-hidden"><a rel="noopener follow" href="{{ url }}" target="_blank"> <div class="flex flex-row justify-between p-2 overflow-hidden"><div class="flex flex-col justify-center p-2"><h2 class="text-black dark:text-gray-100 text-base font-bold">{{ embed_title }}</h2><div class="mt-2 block"><h3 class="text-grey-darker text-sm">{{ embed_description }}</h3></div><div class="mt-5" style=""><p class="text-grey-darker text-xs">{{ embed_site }}</p></div></div><div class="relative flex flew-row h-40 w-72"><div class="lazy absolute inset-0 bg-cover bg-center" data-bg="https://miro.medium.com/v2/resize:fit:320/{{ paragraph.mixtapeMetadata.thumbnailImageId }}"></div></div></div> </a></div>
""")
if paragraph.get("mixtapeMetadata") is not None:
url = paragraph["mixtapeMetadata"]["href"]
else:
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't get url")
current_pos += 1
continue
text_raw = paragraph["text"]
if len(paragraph["markups"]) != 3:
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't split text")
current_pos += 1
continue
title_range = paragraph["markups"][1]
description_range = paragraph["markups"][2]
embed_title = text_raw[title_range["start"]:title_range["end"]]
embed_description = text_raw[description_range["start"]:description_range["end"]]
try:
embed_site = tld.get_fld(url)
except Exception as ex:
logger.warning(f"Can't get embed site fld: {ex}. Using custom logic...")
parsed_url = urllib.parse.urlparse(url)
embed_site = parsed_url.hostname
embed_template_rendered = await embed_template.render_async(paragraph=paragraph, url=url, embed_title=embed_title, embed_description=embed_description, embed_site=embed_site)
out_paragraphs.append(embed_template_rendered)
elif paragraph["type"] == "IFRAME":
iframe_template = jinja_env.from_string('<div class="mt-7"><iframe class="lazy" data-src="{{ host_address }}/render_iframe/{{ iframe_id }}" allowfullscreen="" frameborder="0" scrolling="no"></iframe></div>')
iframe_template_rendered = await iframe_template.render_async(host_address=self.host_address, iframe_id=paragraph["iframe"]["mediaResource"]["id"])
out_paragraphs.append(iframe_template_rendered)
else:
logger.error(f"Unknown {paragraph['type']}: {paragraph}")
current_pos += 1
return out_paragraphs, title, subtitle
async def render_as_html(self, template_folder: str = './templates'):
try:
result = await self._render_as_html(template_folder)
except Exception as ex:
raise ex
# raise MediumParserException(ex) from ex
else:
return result
async def generate_metadata(self, as_dict: bool = False) -> tuple:
title = RLStringHelper(self.post_data["data"]["post"]["title"]).get_text() # quote_html=False
subtitle = RLStringHelper(self.post_data["data"]["post"]["previewContent"]["subtitle"]).get_text()
description = RLStringHelper(textwrap.shorten(subtitle, width=100, placeholder="...")).get_text()
preview_image_id = self.post_data["data"]["post"]["previewImage"]["id"]
creator = self.post_data["data"]["post"]["creator"]
collection = self.post_data["data"]["post"]["collection"]
url = self.post_data["data"]["post"]["mediumUrl"]
reading_time = math.ceil(self.post_data["data"]["post"]["readingTime"])
free_access = "No" if self.post_data["data"]["post"]["isLocked"] else "Yes"
updated_at = convert_datetime_to_human_readable(self.post_data["data"]["post"]["updatedAt"])
first_published_at = convert_datetime_to_human_readable(self.post_data["data"]["post"]["firstPublishedAt"])
tags = self.post_data["data"]["post"]["tags"]
if as_dict:
return {"post_id": self.post_id, "title": title, "subtitle": subtitle, "description": description, "url": url, "creator": creator, "collection": collection, "reading_time": reading_time, "free_access": free_access, "updated_at": updated_at, "first_published_at": first_published_at, "preview_image_id": preview_image_id, "tags": tags}
return title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags
async def _render_as_html(self, template_folder: str = './templates') -> 'HtmlResult':
if not self.post_data:
logger.warning(f'No post data found for post ID: {self.post_id}. Querying...')
await self.query()
jinja_template = jinja2.Environment(loader=jinja2.FileSystemLoader(template_folder), enable_async=True)
post_template = jinja_template.get_template('post.html')
title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags = await self.generate_metadata()
content, title, subtitle = await self._parse_and_render_content_html_post(
self.post_data["data"]["post"]["content"],
title,
subtitle,
preview_image_id,
self.post_data["data"]["post"]["highlights"],
tags
)
post_page_title_raw = "{{ title }} | by {{ creator.name }}"
if collection:
post_page_title_raw += " | in {{ collection.name }}"
post_page_title = jinja_env.from_string(post_page_title_raw)
post_page_title_rendered = await post_page_title.render_async(title=title, creator=creator, collection=collection)
post_context = {
"subtitle": subtitle,
"title": title,
"url": url,
"creator": creator,
"collection": collection,
"readingTime": reading_time,
"freeAccess": free_access,
"updatedAt": updated_at,
"firstPublishedAt": first_published_at,
"previewImageId": preview_image_id,
"content": content,
"tags": tags,
}
post_template_rendered = await post_template.render_async(post_context)
return HtmlResult(post_page_title_rendered, description, url, post_template_rendered)
async def render_as_markdown(self) -> str:
raise NotImplementedError("Markdown rendering is not implemented. Please use HTML rendering instead")

View file

@ -0,0 +1,31 @@
import sqlite3
import asyncio
import pickle
from cache_db import SQLiteCacheBackend
db_path = "../medium_cache.sqlite"
async def main():
conn = sqlite3.connect(db_path)
db_cache = SQLiteCacheBackend("medium_db_cache.sqlite")
db_cache.init_db()
c = conn.cursor()
c.execute("SELECT * FROM responses")
results = c.fetchall()
for result in results:
value_raw = pickle.loads(result[1])
db_cache.push(result[0], await value_raw.text())
# Close the connections
c.close()
conn.close()
db_cache.enable_zstd()
db_cache.close()
asyncio.run(main())

View file

@ -0,0 +1,26 @@
class MediumParserException(Exception):
pass
class PageLoadingError(MediumParserException):
pass
class NotValidMediumURL(MediumParserException):
pass
class InvalidURL(MediumParserException):
pass
class InvalidMediumPostURL(MediumParserException):
pass
class InvalidMediumPostID(MediumParserException):
pass
class MediumPostQueryError(MediumParserException):
pass

File diff suppressed because one or more lines are too long

View file

View file

@ -0,0 +1,9 @@
from dataclasses import dataclass
@dataclass
class HtmlResult:
title: str
description: str
url: str
data: str

View file

@ -0,0 +1,45 @@
from datetime import datetime
def convert_datetime_to_human_readable(unix_time: int):
"""Converts a datetime object to a human-readable format.
Args:
unix_time: The datetime object to convert.
Returns:
A human-readable string representing the datetime object.
"""
datetime_object = datetime.fromtimestamp(unix_time / 1000)
month_names = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
day = datetime_object.day
month = month_names[datetime_object.month - 1]
year = datetime_object.year
human_readable_string = f"{month} {day}, {year}"
return human_readable_string
def get_unix_ms() -> int:
# Get the current date and time
current_date_time = datetime.now()
# Convert to the number of milliseconds since January 1, 1970 (Unix Epoch time)
milliseconds_since_epoch = int(current_date_time.timestamp() * 1000)
return milliseconds_since_epoch

240
core/medium_parser/utils.py Normal file
View file

@ -0,0 +1,240 @@
import hashlib
import secrets
import difflib
import urllib.parse
from aiohttp_retry import RetryClient
from datetime import datetime
from loguru import logger
from functools import lru_cache
from urllib.parse import urlparse, parse_qs
import aiohttp
import string
from . import retry_options, exceptions
import tld
from bs4 import BeautifulSoup
VALID_ID_CHARS = set(string.ascii_letters + string.digits)
KNOWN_MEDIUM_NETLOC = ("javascript.plainenglish.io", "python.plainenglish.io", "levelup.gitconnected.com")
KNOWN_MEDIUM_DOMAINS = ("medium.com", "towardsdatascience.com", "eand.co", "betterprogramming.pub", "curiouse.co", "betterhumans.pub", "uxdesign.cc")
NOT_MEDIUM_DOMAINS = ("github.com", "yandex.ru", "yandex.kz", "youtube.com", "nytimes.com", "wsj.com", "reddit.com", "elpais.com", "forbes.com", "bloomberg.com")
def is_valid_url(url):
fld = get_fld(url)
if not fld:
return False
parsed_url = urlparse(url)
return bool(parsed_url.scheme and parsed_url.netloc)
def getting_percontage_of_match(string: str, matched_string: str) -> int:
if string is None or matched_string is None:
return 0
return difflib.SequenceMatcher(None, string, matched_string).ratio() * 100
def generate_random_sha256_hash():
# Encode the input string to bytes before hashing
random_input_bytes = secrets.token_bytes()
# Create the SHA-256 hash object
sha256_hash = hashlib.sha256()
# Update the hash object with the input bytes
sha256_hash.update(random_input_bytes)
# Get the hexadecimal representation of the hash
sha256_hex = sha256_hash.hexdigest()
return sha256_hex
def get_unix_ms() -> int:
# Get the current date and time
current_date_time = datetime.now()
# Convert to the number of milliseconds since January 1, 1970 (Unix Epoch time)
milliseconds_since_epoch = int(current_date_time.timestamp() * 1000)
return milliseconds_since_epoch
def unquerify_url(url):
"""
Sanitizes a URL by removing all query parameters.
Args:
url: The URL to sanitize.
Returns:
A sanitized URL.
"""
parsed_url = urllib.parse.urlparse(url)
query = parsed_url.query
if query:
parsed_url = parsed_url._replace(query='')
sanitized_url = urllib.parse.urlunparse(parsed_url)
return sanitized_url.removesuffix("/")
def sanitize_url(url):
sanitized_url = url.removesuffix("/page/2")
return sanitized_url.removesuffix("/")
def is_valid_medium_post_id_hexadecimal(hex_string: str) -> bool:
# Check if the string is a valid hexadecimal string
# isalnum()
for char in hex_string:
if char not in VALID_ID_CHARS:
return False
# Check if the string contains only lowercase hexadecimal characters
# if not hex_string.islower():
# return False
# Check if the length of the string is correct for a hexadecimal string (e.g., 10, 11 or 12 characters)
if len(hex_string) not in range(8, 13):
return False
return True
async def resolve_medium_short_link_v1(short_url_id: str, timeout: int = 5) -> str:
async with aiohttp.ClientSession() as session:
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
request = await retry_client.get(
f"https://rsci.app.link/{short_url_id}",
timeout=timeout,
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
allow_redirects=False,
)
post_url = request.headers["Location"]
return await get_medium_post_id_by_url(post_url)
async def get_medium_post_id_by_url(url: str, timeout: int = 5) -> str:
parsed_url = urlparse(url)
if parsed_url.path.startswith("/p/"):
post_id = parsed_url.path.rsplit("/p/")[1]
elif parsed_url.netloc == "l.facebook.com" and parsed_url.path.startswith("/l.php"):
parsed_query = parse_qs(parsed_url.query)
if parsed_query.get("u") and len(parsed_query["u"]) == 1:
post_url = parsed_query["u"][0]
return await get_medium_post_id_by_url(post_url)
return False
elif parsed_url.netloc == "webcache.googleusercontent.com" and parsed_url.path.startswith("/search"):
parsed_query = parse_qs(parsed_url.query)
if parsed_query.get("q") and len(parsed_query["q"]) == 1:
post_url = parsed_query["q"][0].removeprefix("cache:")
return await get_medium_post_id_by_url(post_url)
return False
elif parsed_url.netloc == "www.google.com" and parsed_url.path.startswith("/url"):
parsed_query = parse_qs(parsed_url.query)
if parsed_query.get("url") and len(parsed_query["url"]) == 1:
post_url = parsed_query["url"][0]
return await get_medium_post_id_by_url(post_url)
elif parsed_query.get("q") and len(parsed_query["q"]) == 1:
post_url = parsed_query["q"][0]
return await get_medium_post_id_by_url(post_url)
return False
elif parsed_url.netloc == "12ft.io":
parsed_query = parse_qs(parsed_url.query)
if parsed_query.get("q") and len(parsed_query["q"]) == 1:
post_url = parsed_query["q"][0]
return await get_medium_post_id_by_url(post_url)
return False
elif parsed_url.path.startswith("/m/global-identity-2"):
parsed_query = parse_qs(parsed_url.query)
if parsed_query.get("redirectUrl") and len(parsed_query["redirectUrl"]) == 1:
post_url = parsed_query["redirectUrl"][0]
return await get_medium_post_id_by_url(post_url)
return False
elif parsed_url.netloc == "link.medium.com":
short_url_id = parsed_url.path.removeprefix("/")
return await resolve_medium_short_link_v1(short_url_id, timeout)
else:
post_url = parsed_url.path.split("/")[-1]
post_id = post_url.split("-")[-1]
if not is_valid_medium_post_id_hexadecimal(post_id):
return False
return post_id
async def get_medium_post_id_by_url_old(url: str, timeout: int = 5) -> str:
async with aiohttp.ClientSession() as session:
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
request = await retry_client.get(url, timeout=timeout)
response = await request.text()
soup = BeautifulSoup(response, "html.parser")
type_meta_tag = soup.head.find("meta", property="og:type")
if not type_meta_tag or type_meta_tag.get("content") != "article":
return False
url_meta_tag = soup.head.find("meta", property="al:android:url")
if not url_meta_tag or not url_meta_tag.get("content"):
return False
parsed_url = urlparse(url_meta_tag["content"])
path = parsed_url.path.strip("/")
parsed_value = path.split("/")[-1]
return parsed_value
@lru_cache(maxsize=200)
def get_fld(url: str):
try:
fld = tld.get_fld(url)
except Exception as ex:
logger.trace(ex)
return None
else:
return fld
async def is_valid_medium_url(url: str, timeout: int = 5) -> bool:
"""
Check if the url is a valid medium.com url
First stage of url validation is checking if the domain is in the known medium.com url list. If the domain is in the list, then the url is valid
Second stage is checking if the url is valid Medium site by performing a GET request to the url and checking the site name meta tag. If the site name meta tag is Medium, then the url is valid
"""
# First stage
domain = get_fld(url)
parsed_url = urlparse(url)
if domain in ["12ft.io", "google.com", "facebook.com", "googleusercontent.com"]:
return True
if domain in NOT_MEDIUM_DOMAINS:
raise exceptions.NotValidMediumURL("100% not valid Medium URL")
if domain in KNOWN_MEDIUM_DOMAINS or parsed_url.netloc in KNOWN_MEDIUM_NETLOC:
return True
else:
logger.warning(f"url '{url}' wasn't detected in known medium domains")
# Second stage
async with aiohttp.ClientSession() as session:
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
try:
request = await retry_client.get(url, timeout=timeout)
except Exception as ex:
raise exceptions.PageLoadingError(ex) from ex
response = await request.text()
soup = BeautifulSoup(response, "html.parser")
if not soup.head:
return False
site_name_meta_tag = soup.head.find("meta", property="og:site_name")
if not site_name_meta_tag or site_name_meta_tag.get("content") != "Medium":
return False
return True

View file

@ -0,0 +1,3 @@
djlint==1.32.1
ruff==0.0.261
black==23.7.0

8
core/requirements.txt Normal file
View file

@ -0,0 +1,8 @@
loguru==0.6.0
aiohttp==3.8.5
aiohttp-retry==2.8.3
tld==0.13
bs4==0.0.1
Jinja2==3.1.2
beautifulsoup4==4.12.2
# git+https://github.com/phiresky/sqlite-zstd.git#egg=sqlite_zstd&subdirectory=python

25
core/setup.py Normal file
View file

@ -0,0 +1,25 @@
from setuptools import setup, find_packages
# Function to read the contents of the requirements file
def read_requirements():
with open('requirements.txt', 'r') as req:
return req.read().splitlines()
setup(
name='medium_parser',
version='0.1.0',
author='Freedium community',
author_email='admin@freedium.cfd',
description='A parser for Medium posts',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://codeberg.org/Freedium-cfd/web',
packages=find_packages(),
install_requires=read_requirements(),
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
python_requires='>=3.7',
)

79
core/templates/post.html Normal file
View file

@ -0,0 +1,79 @@
<div class="container w-full md:max-w-3xl mx-auto pt-20 break-words">
<div class="w-full px-4 md:px-6 text-xl text-gray-800 leading-normal" style="font-family:Georgia,serif">
<div class="font-sans">
<p class="text-base md:text-sm text-green-500 font-bold pb-3">
<a href="{{ url }}" class="text-sm md:text-sm text-green-500 font-bold no-underline hover:underline ">&lt; Go to the original</a>
</p>
{% if previewImageId %}
<img alt="Preview image"
style="max-height: 65vh;
width: auto;
margin: auto"
loading="eager"
role="presentation"
src="https://miro.medium.com/v2/resize:fit:700/{{ previewImageId }}">
{% endif %}
<h1 class="font-bold font-sans break-normal text-gray-900 pt-6 pb-2 text-3xl md:text-4xl">{{ title }}</h1>
{% if subtitle %}<h2 class="font-medium font-sans break-normal text-gray-600 pt-1 pb-3 text-1xl md:text-1xl">{{ subtitle }}</h2>{% endif %}
</div>
<div class="bg-gray-100 border border-gray-300 m-2">
<div class="flex items-center space-x-4 p-4">
<div class="flex-shrink-0">
<a href="https://medium.com/@{{ creator.username }}" target="_blank" title="{{ creator.bio }}" class="block relative">
<img src="https://miro.medium.com/v2/resize:fill:88:88/{{ creator.imageId }}"
alt="{{ creator.name }}"
class="rounded-full h-11 w-11 no-lightense">
<div class="absolute bottom-0 right-0 h-3 w-3 border-2 border-white bg-green-500 rounded-full"></div>
</a>
</div>
<div class="flex-grow">
<a href="https://medium.com/@{{ creator.username }}"
target="_blank"
title="{{ creator.bio }}"
class="block font-semibold text-gray-900">{{ creator.name }}</a>
<button class="text-sm text-white bg-green-500 px-3 py-1 rounded-lg mt-1">
<a href="https://medium.com/@{{ creator.username }}"
target="_blank"
title="{{ creator.bio }}"
class="block text-sm text-white">Follow</a>
</button>
</div>
</div>
<div class="px-4 pb-2">
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500">
{% if collection %}
<a href="https://medium.com/{{ collection.slug }}"
title="{{ collection.shortDescription }}"
target="_blank"
class="flex items-center space-x-1">
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ collection.avatar.id }}"
alt="{{ collection.name }}"
class="h-4 w-4 rounded-full no-lightense">
<p>{{ collection.name }}</p>
</a>
<span>·</span>
{% endif %}
<span class="text-gray-500">~{{ readingTime }} min read</span>
<span class="md:inline">·</span>
<span class="text-gray-500">{{ firstPublishedAt }} (Updated: {{ updatedAt }})</span>
<span class="md:inline">·</span>
<span class="text-yellow-500">Free: {{ freeAccess }}</span>
</div>
</div>
</div>
{% for paragraph in content %}{{ paragraph }}{% endfor %}
<div class="flex flex-wrap gap-2 mt-5">
{% for tag in tags %}<a title="{{ tag.displayTitle }}" target="_blank" href="https://medium.com/tag/{{ tag.normalizedTagSlug }}"><span class="text-green-500 bg-green-100 px-2 py-1 rounded-full text-xs">#{{ tag.normalizedTagSlug }}</span></a>{% endfor %}
</div>
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
</div>
<style>
code {
/*font-size: 75%;*/
background-color: #e3e2e2;
}
pre {
font-size: 75%;
background-color: #e3e2e2;
}
</style>

View file

@ -0,0 +1,112 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
<title>{{ page_title }}</title>
{% if creator %}
<meta name="author" content="{{ creator.name }}" />
{% endif %}
<meta name="description" content="{{ page_description or 'Your paywall breakthrough for medium.com!' }}" />
<meta name="keywords" content="medium, paywall, medium.com, paywall breakthrough" />
<link rel="stylesheet" href="https://unpkg.com/tailwindcss@2.2.19/dist/tailwind.min.css"/>
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#00aba9">
<meta name="msapplication-TileColor" content="#00aba9">
<meta name="theme-color" content="#ffffff">
<script src="https://cdn.jsdelivr.net/npm/lightense-images@1.0.17/dist/lightense.min.js"></script>
</head>
<body class="bg-gray-100 font-sans leading-normal tracking-normal">
<nav id="header" class="fixed w-full z-10 top-0">
{% if enable_ads_header %}
<div class="w-full bg-yellow-400 text-center py-1 px-4"><p class="text-yellow-900">Place your advertisement here! Contact us at advertise@freedium.com</p></div>
{% endif %}
<div id="progress" class="h-1 z-20 top-0" style="background:linear-gradient(to right, #4dc0b5 var(--scroll), transparent 0);"></div>
<div class="w-full md:max-w-4xl mx-auto flex flex-wrap items-center justify-between mt-0 py-3">
<div class="pl-4">
<a class="text-green-500 text-base no-underline hover:no-underline font-extrabold text-xl" href="/" onclick="navigateToOrigin()">
Freedium βeta
</a>
</div>
<div class="block lg:hidden pr-4">
<button id="nav-toggle" class="flex items-center px-3 py-2 border rounded text-gray-500 border-gray-600 hover:text-gray-900 hover:border-green-500 appearance-none focus:outline-none">
<svg class="fill-current h-3 w-3" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
<title>Menu</title>
<path d="M0 3h20v2H0V3zm0 6h20v2H0V9zm0 6h20v2H0v-2z" />
</svg>
</button>
</div>
<div class="w-full flex-grow lg:flex lg:items-center lg:w-auto hidden lg:block mt-2 lg:mt-0 bg-gray-100 z-20" id="nav-content">
<ul class="list-reset lg:flex justify-end flex-1 items-center">
<!--
<li class="mr-3">
<a class="inline-block py-2 px-4 text-gray-900 font-bold no-underline" href="#">Active</a>
</li>
-->
<li class="mr-3">
<a class="inline-block text-gray-600 no-underline hover:text-gray-900 hover:text-underline py-2 px-4" href="https://medium.com/">Medium.com</a>
</li>
<!--
<li class="mr-3">
<a class="inline-block text-gray-600 no-underline hover:text-gray-900 hover:text-underline py-2 px-4" href="#">link</a>
</li>
-->
</ul>
</div>
</div>
</nav>
{% if enable_ads_header %}
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
{% endif %}
{{ body_template }}
<script>
function navigateToOrigin() {
window.location.href = window.location.origin;
}
</script>
<script>
const h = document.documentElement, b = document.body;
const st = 'scrollTop';
const sh = 'scrollHeight';
const progress = document.getElementById('progress');
const header = document.getElementById('header');
const navcontent = document.getElementById('nav-content');
document.addEventListener('scroll', function () {
/* Refresh scroll % width */
const scroll = (h[st] || b[st]) / ((h[sh] || b[sh]) - h.clientHeight) * 100;
progress.style.setProperty('--scroll', scroll + '%');
/* Apply classes for slide in bar */
const shouldAddClass = window.scrollY > 10;
header.classList.toggle('bg-white', shouldAddClass);
header.classList.toggle('shadow', shouldAddClass);
navcontent.classList.toggle('bg-gray-100', !shouldAddClass);
navcontent.classList.toggle('bg-white', shouldAddClass);
});
document.getElementById('nav-toggle').onclick = function() {
document.getElementById("nav-content").classList.toggle("hidden");
}
window.addEventListener('load', function () {
Lightense('img:not(.no-lightense)');
}, false);
</script>
</body>
</html>

View file

@ -0,0 +1,44 @@
import asyncio
import json
import sys
import jinja2
from loguru import logger
from medium_parser.core import MediumParser
jinja2_env = jinja2.Environment(
loader=jinja2.FileSystemLoader("./"),
)
async def safe_main():
try:
await main()
except Exception as ex:
logger.exception(ex)
async def main():
logger.remove()
# logger.add(sys.stderr, level="INFO")
logger.add(sys.stderr, level="TRACE")
# dl = await MediumParser.from_url("")
dl = MediumParser("3d8e0ba02d10", 8, "localhost")
query_result = await dl.query(use_cache=False)
with open("query_result.json", "w") as f:
json.dump(query_result, f, indent=2)
result = await dl.render_as_html()
with open("medium.html", "w") as f:
template = jinja2_env.get_template("example_base_template.html")
template_result = template.render(body_template=result.data)
f.write(template_result)
print("See medium.html for the result. Press CTRL-C to exit.")
sys.exit()
if __name__ == "__main__":
asyncio.run(safe_main())

BIN
data/original.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

BIN
data/vector-1028x1028.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

83
data/vector-1028x1028.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 65 KiB

82
data/vector-32x32.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 65 KiB

2
pyproject.toml Normal file
View file

@ -0,0 +1,2 @@
[tool.black]
line-length = 220

3
requirements-dev.txt Normal file
View file

@ -0,0 +1,3 @@
djlint==1.32.1
ruff==0.0.261
black==23.7.0

2
requirements-fast.txt Executable file
View file

@ -0,0 +1,2 @@
orjson==3.9.2
uvloop==0.17.0

12
requirements.txt Normal file
View file

@ -0,0 +1,12 @@
pickledb==0.9.2
html5lib==1.1
fastapi-limiter==0.1.5
sentry-sdk[fastapi]==1.29.2
loguru==0.6.0 # due to: https://github.com/Delgan/loguru/issues/916
uvicorn==0.20.0
Jinja2==3.1.2
fastapi==0.91.0
starlette==0.24.0
gunicorn==21.2.0
redis[hiredis]==4.6.0
xkcdpass==1.19.3

160
rl_string_helper/.gitignore vendored Normal file
View file

@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

View file

@ -0,0 +1,3 @@
# rl-string-helper
`RLStringHelper` is designed specifically for use with Medium.com parser as string markup helper. The basic idea is to apply multiple markups, multiple replacements, to the same character positions. Also adapts all characters to UTF-16 encoding. See tests for more information.

View file

@ -0,0 +1 @@
loguru==0.6.0

View file

@ -0,0 +1,2 @@
from .string_helper import RLStringHelper, parse_markups, split_overlapping_ranges
from .utils import quote_html, quote_symbol

View file

@ -0,0 +1,39 @@
import asyncio
import time
from functools import wraps
from loguru import logger
def trace(func):
if asyncio.iscoroutinefunction(func):
logger.trace(f"{func.__name__!r} function is a coroutine")
@wraps(func)
async def wrapper(*args, **kwargs):
start_ts = time.time()
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
original_result = await func(*args, **kwargs)
logger.trace(f"Result: {original_result}")
logger.trace(f"Result type: {type(original_result)}")
duration_ts = time.time() - start_ts
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
return original_result
else:
logger.trace(f"{func.__name__!r} is not a coroutine")
@wraps(func)
def wrapper(*args, **kwargs):
start_ts = time.time()
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
original_result = func(*args, **kwargs)
logger.trace(f"Result: {original_result}")
logger.trace(f"Result type: {type(original_result)}")
duration_ts = time.time() - start_ts
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
return original_result
return wrapper

View file

@ -0,0 +1,524 @@
from loguru import logger
from .logger_trace import trace
from .utils import quote_html, quote_symbol
from jinja2 import Environment, DebugUndefined, Template
jinja_env = Environment(undefined=DebugUndefined)
# TODO: doc!
class StringAsignmentMix:
__slots__ = ("string", "string_list")
def __init__(self, string: str):
if isinstance(string, str):
self.string = string
elif isinstance(string, StringAsignmentMix):
self.string = string.string
else:
raise ValueError(f"Incorrect string type: {type(string)}")
self.string_list = list(self.string)
def __render_string(self):
self.string = "".join(self.string_list)
def __len__(self):
self.__render_string()
return len(self.string)
def pop(self, key):
self.string_list.pop(key)
# self.__render_string()
return self
def encode(self, encoding: str):
self.__render_string()
return self.string.encode(encoding)
def insert(self, key: int, value):
self.string_list.insert(key, value)
# self.__render_string()
return self
def __setitem__(self, key, value):
logger.trace(f"Calling __setitem__ with {key=}, {value=}")
self.string_list[key] = value
return self
def __getitem__(self, key):
logger.trace(f"Calling __getitem__ with {key=}")
str_list_res = self.string_list[key]
return "".join(str_list_res)
def __str__(self):
self.__render_string()
return self.string
def __repr__(self):
self.__render_string()
return self.__str__()
# TODO: more clarified description
"""
In JavaScript, the `length` property of a String object returns the number of code units (bytes) in the string, which makes use of UTF-16 encoding.
In UTF-16, each Unicode character may be encoded as one or two code units (byte). This means that for certain scripts, such as emojis, mathematical symbols, or some Chinese characters,
the value returned by length might not match the actual number of Unicode characters in the string.
Python uses UTF-8 encoding, which each character is encoded as one byte. So here is a workaround to get the actual number of characters and manipulate them in string as in UTF-16 encoding. See pre_utf_16_bang and post_utf_16_bang function.
"""
# TODO: doc! Who will read this noodles lol?
# TODO: check cases when UTF-16 character can be more that 2 bytes
class RLStringHelper:
__slots__ = ("string", "templates", "replaces", "quote_html_type", "quote_replaces")
def __init__(self, string: str, quote_html_type: list[str] = ["full"]):
self.string = StringAsignmentMix(quote_symbol(string))
self.templates = []
self.quote_replaces = []
self.replaces = []
self.quote_html_type = quote_html_type
@trace
def pre_utf_16_bang(self, string: str, string_pos_matrix: list, _default_bang_char: str = "R"):
utf_16_bang_list = []
string_len_utf_16 = len(string.encode("utf-16-le")) // 2
if string_len_utf_16 == len(string):
logger.trace("String is doesn't contain multibyte characters")
return string, string_pos_matrix, utf_16_bang_list
i = 0
while len(string) - 1 > i:
new_i = string_pos_matrix[i]
char = string[new_i]
char_len = len(char.encode("utf-16-le")) // 2
if char_len == 2:
char_len_dif = char_len - 1
logger.trace(char_len_dif)
logger.trace(f"'{char}' char is two bytes")
# logger.trace(f"'{char}' char is multibyte")
char_present = _default_bang_char * char_len_dif
logger.trace(f"{char_present=}")
string, string_pos_matrix = self._paste_char(string, string_pos_matrix, new_i + 1, char_present)
i += 1
utf_16_bang_list.append((i, char_len_dif, i))
elif char_len == 1:
logger.trace(f"'{char}' char is single byte")
pass
else:
ValueError(f"Invalid char: {char}")
i += 1
logger.trace(utf_16_bang_list)
logger.trace(string_pos_matrix)
logger.trace(len(string))
return string, string_pos_matrix, utf_16_bang_list
def _paste_char(self, string: str, string_pos_matrix: list, pos: int, char: str):
char_len = len(char)
string_pos_matrix.insert(pos, string_pos_matrix[pos])
for matrix_i, matrix in enumerate(string_pos_matrix[pos + 1:], pos + 1):
string_pos_matrix[matrix_i] += char_len
string.insert(pos, char)
return string, string_pos_matrix
def _delete_char(self, string: str, string_pos_matrix: list, pos: int, char_len: int, old_pos: int):
string.pop(pos)
string_pos_matrix.pop(old_pos)
for matrix_i, matrix in enumerate(string_pos_matrix[pos:], pos):
if isinstance(string_pos_matrix[matrix_i], int):
string_pos_matrix[matrix_i] -= char_len
elif isinstance(string_pos_matrix[matrix_i], tuple):
string_pos_matrix[matrix_i] = (string_pos_matrix[matrix_i][0] - char_len, string_pos_matrix[matrix_i][1] - char_len)
return string, string_pos_matrix
@trace
def post_utf_16_bang(self, string: str, string_pos_matrix: list, utf_16_bang_list: list, _default_bang_char: str = "R"):
string = StringAsignmentMix(string)
post_transbang = 0
for bang_pos, char_len, old_pos in utf_16_bang_list:
string, string_pos_matrix = self._delete_char(string, string_pos_matrix, bang_pos - post_transbang, char_len, old_pos - post_transbang)
post_transbang += char_len
logger.trace(utf_16_bang_list)
logger.trace(string_pos_matrix)
return string, string_pos_matrix
@trace
def set_template(self, start: int, end: int, template: str):
if not isinstance(template, Template):
template = jinja_env.from_string(template)
lazy_template = (start, end), template
self.templates.append(lazy_template)
logger.trace(self.templates)
@trace
def set_replace(self, start: int, end: int, replace_with: str):
lazy_replace = (start, end), replace_with
self.replaces.append(lazy_replace)
logger.trace(self.replaces)
def _render_templates(self, string: str, string_pos_matrix: list, utf_16_bang_list: list):
if not self.templates:
return string, string_pos_matrix, utf_16_bang_list
templates = self.templates
templates.reverse()
older_text = string
updated_text = string
logger.trace(string_pos_matrix)
@trace
def _get_prefix_len(template_raw: Template, inner_char: str = "{"):
prefix_len = 0
template = template_raw.render()
for i in range(len(template)):
if template[i] == inner_char:
return prefix_len
prefix_len += 1
else:
raise ValueError(f"Invalid template: {template}")
@trace
def _get_suffix_len(template_raw: Template, outer_char: str = "}"):
suffix_len = 0
template = template_raw.render()
for i in range(len(template) - 1, -1, -1):
if template[i] == outer_char:
return suffix_len
suffix_len += 1
else:
raise ValueError(f"Invalid template: {template}")
@trace
def update_nested_positions(start, end, prefix_len, suffix_len):
logger.trace(len(self.string) == len(string_pos_matrix))
logger.trace(f"{len(self.string)=}")
for i in range(end, len(string_pos_matrix)):
logger.trace(f"{i=}")
logger.trace(f"{string_pos_matrix[i]=}")
string_pos_matrix[i] = string_pos_matrix[i] + suffix_len + prefix_len
for i in range(start, end):
string_pos_matrix[i] = string_pos_matrix[i] + prefix_len
for n in range(len(utf_16_bang_list)):
utf_16_bang = utf_16_bang_list[n]
if utf_16_bang[2] > end:
utf_16_bang_list[n] = (utf_16_bang[0] + prefix_len + suffix_len, utf_16_bang[1], utf_16_bang[2])
elif utf_16_bang[2] > start:
utf_16_bang_list[n] = (utf_16_bang[0] + prefix_len, utf_16_bang[1], utf_16_bang[2])
logger.trace(string_pos_matrix)
logger.trace(utf_16_bang_list)
logger.trace(string_pos_matrix)
for (start, end), template in templates:
logger.trace(older_text == updated_text)
logger.trace(f"{updated_text}")
logger.trace(f"{start=}, {end=}, {template=}")
if start >= len(string_pos_matrix):
logger.warning("Start position is out of range. Ignore...")
continue
elif end - 1 >= len(string_pos_matrix):
logger.warning("End position is out of range. Using workaround.")
while end - 1 >= len(string_pos_matrix):
end -= 1
if start == end:
logger.warning("Start and end positions are the same")
continue
logger.trace(f"{len(string_pos_matrix)=}")
new_start, new_end = (
string_pos_matrix[start],
string_pos_matrix[end - 1] + 1,
)
if new_end < new_start:
logger.error(f"Invalid negative range: {new_start=} {new_end=}. Ignore.....")
# we had to ignore this error since we need to release new version
# raise ValueError(f"Invalid negative range: {new_start=} {new_end=}")
continue
logger.trace(f"{new_start=}, {new_end=}")
logger.trace(updated_text[new_start:new_end])
older_text = updated_text
logger.trace(f"{older_text=}")
context_text = template.render(text=older_text[new_start:new_end])
logger.trace(context_text)
updated_text_template = jinja_env.from_string("{{ updated_text[:new_start] }}{{ context_text }}{{updated_text[new_end:]}}")
updated_text = updated_text_template.render(updated_text=updated_text, context_text=context_text, new_start=new_start, new_end=new_end)
logger.trace(updated_text)
prefix_len = _get_prefix_len(template)
suffix_len = _get_suffix_len(template)
update_nested_positions(start, end, prefix_len, suffix_len)
logger.trace(string_pos_matrix)
return updated_text, string_pos_matrix, utf_16_bang_list
@trace
def _render_replaces(self, string: str, string_pos_matrix: list, utf_16_bang_list: list):
if not self.replaces and not self.quote_replaces:
return string, string_pos_matrix, utf_16_bang_list
string = StringAsignmentMix(string)
replaces = self.replaces + self.quote_replaces
@trace
def update_positions(start: int, end: int, replace_len: int, new_start: int, new_end: int):
pos_len = len(range(start, end))
logger.trace(pos_len)
pos_len_diff = replace_len - pos_len
logger.trace(pos_len_diff)
for pos_index, pos_matrix in enumerate(string_pos_matrix[end:], end):
if isinstance(pos_matrix, int):
string_pos_matrix[pos_index] += pos_len_diff
elif isinstance(pos_matrix, tuple):
string_pos_matrix[pos_index] = (
string_pos_matrix[pos_index][0] + pos_len_diff,
string_pos_matrix[pos_index][1] + pos_len_diff,
)
if pos_len_diff != 0:
for i in range(start, end):
if isinstance(string_pos_matrix[i], int):
string_pos_matrix[i] = (
string_pos_matrix[i],
string_pos_matrix[i] + replace_len,
)
elif isinstance(string_pos_matrix[i], tuple):
string_pos_matrix[i] = (
string_pos_matrix[i][0] + replace_len,
string_pos_matrix[i][1] + replace_len,
)
for n in range(len(utf_16_bang_list)):
utf_16_bang = utf_16_bang_list[n]
if utf_16_bang[0] > end:
utf_16_bang_list[n] = (utf_16_bang[0] + pos_len_diff, utf_16_bang[1], utf_16_bang[2])
logger.trace(string_pos_matrix)
for (start, end), replace_with in replaces:
new_start, new_end = string_pos_matrix[start], string_pos_matrix[end - 1]
if isinstance(new_end, int):
new_end += 1
if isinstance(new_start, tuple) or isinstance(new_end, tuple):
if isinstance(new_start, tuple):
new_start_tmp = list(range(new_start[0], new_start[1] + 1))
else:
new_start_tmp = [new_start]
if isinstance(new_end, tuple):
new_end_tmp = list(range(new_end[0], new_end[1] + 1))
else:
new_end_tmp = [new_end]
new_range = new_start_tmp + new_end_tmp
logger.trace(new_range)
new_start, new_end = min(new_range), max(new_range)
logger.trace(f"{new_start=}, {new_end=}")
logger.trace(string[new_start:new_end])
string[new_start:new_end] = replace_with
logger.trace(string)
update_positions(start, end, len(replace_with), new_start, new_end)
logger.trace(string_pos_matrix)
return string, string_pos_matrix, utf_16_bang_list
@trace
def __str__(self):
string = StringAsignmentMix(self.string)
string_pos_matrix = [pos for pos in range(len(string))]
updated_text, string_pos_matrix, utf_16_bang_list = self.pre_utf_16_bang(string, string_pos_matrix)
if self.quote_html_type:
self.quote_replaces = []
html_quote_replaces = quote_html(str(updated_text), self.quote_html_type)
for html_quote in html_quote_replaces:
self.quote_replaces.append(html_quote)
if not self.templates and not self.replaces and not self.quote_replaces:
logger.debug("No templates, no replaces, no quote_replaces")
return str(self.string)
updated_text, string_pos_matrix, utf_16_bang_list = self._render_templates(updated_text, string_pos_matrix, utf_16_bang_list)
updated_text, string_pos_matrix, utf_16_bang_list = self._render_replaces(updated_text, string_pos_matrix, utf_16_bang_list)
updated_text, string_pos_matrix = self.post_utf_16_bang(updated_text, string_pos_matrix, utf_16_bang_list)
return str(updated_text)
def get_text(self):
return self.__str__()
def split_overlapping_ranges(markups):
last_fixed_markup = markups
for _ in range(len(markups) * 7):
markups = split_overlapping_range_position(markups)
if last_fixed_markup and len(last_fixed_markup) == len(markups):
break
last_fixed_markup = markups
return last_fixed_markup
def split_overlapping_range_position(positions):
if not positions:
return []
# Sort the positions by start
positions.sort(key=lambda x: x["start"])
logger.trace(positions)
# Initialize the result list with the first position
result = [positions[0]]
logger.trace(result)
for pos in positions[1:]:
logger.trace(pos)
last = result[-1]
# If the current position overlaps with the last one in the result
if pos["start"] < last["end"]:
logger.trace(0)
# If the current position has a different markup and ends before the last one
if pos["type"] != last["type"] and pos["end"] < last["end"]:
logger.trace(1)
# Split the last position into three
result[-1] = {
"start": last["start"],
"end": pos["start"],
"type": last["type"],
"template": last["template"],
}
logger.trace(result)
result.append(
{
"start": pos["start"],
"end": pos["end"],
"type": pos["type"],
"template": pos["template"],
}
)
logger.trace(result)
result.append(
{
"start": pos["start"],
"end": pos["end"],
"type": last["type"],
"template": last["template"],
}
)
logger.trace(result)
result.append(
{
"start": pos["end"],
"end": last["end"],
"type": last["type"],
"template": last["template"],
}
)
logger.trace(result)
elif pos["type"] != last["type"]:
logger.trace(2)
# Split the last position into two, updating end of the last position
result[-1] = {
"start": last["start"],
"end": pos["start"],
"type": last["type"],
"template": last["template"],
}
logger.trace(result)
result.append(
{
"start": pos["start"],
"end": pos["end"],
"type": pos["type"],
"template": pos["template"],
}
)
logger.trace(result)
result.append(
{
"start": pos["start"],
"end": last["end"],
"type": last["type"],
"template": last["template"],
}
)
logger.trace(result)
else:
logger.trace(3)
# Update the end of the last position in the result
result[-1]["end"] = max(last["end"], pos["end"])
logger.trace(result)
else:
logger.trace(4)
# Add the current position to the result
result.append(pos)
logger.trace(result)
return result
def raw_render(**kwargs):
for key, value in kwargs.items():
if isinstance(value, str):
kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}"
return kwargs
def parse_markups(markups: list):
markups_out = []
for markup in markups:
logger.trace(markup)
if markup["type"] == "A":
if markup["anchorType"] == "LINK":
template = jinja_env.from_string('<a class="text-base" style="text-decoration: underline;" rel="{{rel}}" title="{{title}}" href="{{href}}" target="_blank">{{text}}</a>')
template = template.render(raw_render(rel=markup.get("rel", ""), title=markup.get("title", ""), href=markup["href"]))
elif markup["anchorType"] == "USER":
template = jinja_env.from_string('<a class="text-base" style="text-decoration: underline;" href="https://medium.com/u/{{userId}}">{{text}}</a>')
template = template.render(userId=markup["userId"])
else:
logger.error(f"Can't proccess 'anchorType': {markup['anchorType']}")
continue
elif markup["type"] == "STRONG":
template = "<strong>{{text}}</strong>"
elif markup["type"] == "EM":
template = "<em>{{text}}</em>"
elif markup["type"] == "CODE":
template = "<code class='p-1 dark:bg-gray-600'>{{text}}</code>"
else:
logger.error(f"Unknown markup type: {markup}")
continue
template = jinja_env.from_string(template)
markup["template"] = template
markups_out.append(markup)
return markups_out

View file

@ -0,0 +1,41 @@
import re
MINIMAL_QUOTE_PATTERN = re.compile(r"""([&<>])(?!(amp|lt|gt|quot|#39);)""")
MINIMAL_QUOTE_REPLACE_WITH = {
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
}
NORMAL_QUOTE_PATTERN = re.compile("|".join(map(re.escape, ['"', "'"])))
NORMAL_QUOTE_REPLACE_WITH = {
'"': "&quot;", # should be escaped in attributes
"'": "&#39", # should be escaped in attributes
}
EXTRA_QUOTE_PATTERN = re.compile("|".join(map(re.escape, ["\n", "\t"]))) # ' '
EXTRA_QUOTE_REPLACE_WITH = {"\n": "<br />", "\t": "&emsp;"} # " ": " &nbsp;"
QUOTE_SYMBOL = {'': '"', "": '"', "": "'", "": "'"}
def quote_symbol(text: str) -> str:
for k, v in QUOTE_SYMBOL.items():
text = text.replace(k, v)
return text
# https://stackoverflow.com/questions/1061697/whats-the-easiest-way-to-escape-html-in-python
# XXX: disabling extra quoting as workaround
def quote_html(html: str, quote_types: list[str]) -> list[tuple[int, str]]:
if 'minimal' in quote_types or 'full' in quote_types or 'extra' in quote_types:
for m in MINIMAL_QUOTE_PATTERN.finditer(html):
yield m.span(), MINIMAL_QUOTE_REPLACE_WITH[m.group(1)]
if 'normal' in quote_types or 'full' in quote_types or 'extra' in quote_types:
for m in NORMAL_QUOTE_PATTERN.finditer(html):
yield m.span(), NORMAL_QUOTE_REPLACE_WITH[m.group(0)]
if 'extra' in quote_types in quote_types:
for m in EXTRA_QUOTE_PATTERN.finditer(html):
pos = m.span()
yield pos, EXTRA_QUOTE_REPLACE_WITH[html[pos[0]:pos[1]]]

25
rl_string_helper/setup.py Normal file
View file

@ -0,0 +1,25 @@
from setuptools import setup, find_packages
# Function to read the contents of the requirements file
def read_requirements():
with open('requirements.txt', 'r') as req:
return req.read().splitlines()
setup(
name='rl_string_helper',
version='0.1.0',
author='Freedium community',
author_email='admin@freedium.cfd',
description='Helper for Medium parser backend',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://codeberg.org/Freedium-cfd/web',
packages=find_packages(),
install_requires=read_requirements(),
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
python_requires='>=3.7',
)

View file

View file

@ -0,0 +1,127 @@
import sys
from loguru import logger
from rl_string_helper import RLStringHelper, quote_html, parse_markups
class TestRLStringHelper:
def setup_method(self):
logger.remove()
logger.add(sys.stdout, level="TRACE")
def test_html_quote(self):
quoted_string_1 = [i for i in quote_html("<Hello world>")]
assert quoted_string_1 == [((0, 1), '&lt;'), ((12, 13), '&gt;')]
# Test with standard HTML characters
html = '<div class="test">Hello & World</div>'
result = list(quote_html(html))
expected = [((0, 1), '&lt;'), ((11, 12), '&quot;'), ((16, 17), '&quot;'), ((17, 18), '&gt;'), ((24, 25), '&amp;'), ((31, 32), '&lt;'), ((36, 37), '&gt;')]
assert result == expected
# Test with extra characters
html = '<div class="test">\nHello & World</div>'
result = list(quote_html(html, True))
expected = [((0, 1), '&lt;'), ((11, 12), '&quot;'), ((16, 17), '&quot;'), ((17, 18), '&gt;'), ((25, 26), '&amp;'), ((32, 33), '&lt;'), ((37, 38), '&gt;'), ((18, 19), '<br />')]
assert result == expected
# Test with quote characters
html = '<div class="test">Hello & \'World\'</div>'
result = list(quote_html(html))
expected = [((0, 1), '&lt;'), ((11, 12), '&quot;'), ((16, 17), '&quot;'), ((17, 18), '&gt;'), ((24, 25), '&amp;'), ((26, 27), '&#39'), ((32, 33), '&#39'), ((33, 34), '&lt;'), ((38, 39), '&gt;')]
assert result == expected
def test_basic_template(self):
helper = RLStringHelper("Hello world")
helper.set_template(0, 5, "<a>{{text}}</a>")
assert str(helper) == "<a>Hello</a> world"
helper.set_template(6, 11, "<b>{{text}}</b>")
assert str(helper) == "<a>Hello</a> <b>world</b>"
helper.set_template(0, 11, "<i>{{text}}</i>")
assert str(helper) == "<i><a>Hello</a> <b>world</b></i>"
def test_basic_replace(self):
# Replace A to B - ONE to ONE char
helper = RLStringHelper("ABC")
helper.set_replace(0, 1, "B")
assert str(helper) == "BBC"
# Replace first B to AA - ONE to TWO chars
helper.set_replace(0, 1, "AA")
assert str(helper) == "AABC"
# Replace C to D - ONE to ONE char
helper.set_replace(2, 3, "D")
assert str(helper) == "AABD"
# Replace BD to R - TWO to ONE char
helper.set_replace(1, 3, "R")
assert str(helper) == "AAR"
# Replace AA to CD
helper.set_replace(0, 2, "CD")
assert str(helper) == "CD"
def test_multibyte_replace(self):
helper = RLStringHelper("TESERT - 📊 - ABC")
helper.set_replace(0, 6, "B")
assert helper.get_text() == "B - 📊 - ABC"
helper = RLStringHelper("Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.")
helper.set_template(0, 200, "<kr>{{text}}</kr>")
helper.set_template(0, 200, "<kz>{{text}}</kz>")
assert helper.get_text() == "<kz><kr>Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.</kr></kz>"
helper = RLStringHelper("TESERT ALMACOM - 📊 - ABC")
helper.set_replace(0, 14, "B")
assert helper.get_text() == "B - 📊 - ABC"
helper = RLStringHelper("hello - 📊 - ABC")
helper.set_template(0, 5, "<a>{{text}}</a>")
assert helper.get_text() == "<a>hello</a> - 📊 - ABC"
helper = RLStringHelper("ABC 📊 - How are you?")
helper.set_template(4, 6, "<a>{{text}}</a>")
assert str(helper) == "ABC <a>📊</a> - How are you?"
helper = RLStringHelper("We have a 📊, a 📊 and a 📊.")
helper.set_template(0, 30, "<e>{{text}}</e>")
assert helper.get_text() == "<e>We have a 📊, a 📊 and a 📊.</e>"
def test_romano(self):
issue_text = "Whilst academic research papers have highlighted performance issues with the prophet since 2017, the propagation of package popularity through the data science community has been fueled by 𝙗𝙤𝙩𝙝 𝙚𝙭𝙘𝙚𝙨𝙨𝙞𝙫𝙚 𝙘𝙡𝙖𝙞𝙢𝙨 𝙛𝙧𝙤𝙢 𝙩𝙝𝙚 𝙤𝙧𝙞𝙜𝙞𝙣𝙖𝙡 𝙙𝙚𝙫𝙚𝙡𝙤𝙥𝙢𝙚𝙣𝙩 𝙩𝙚𝙖𝙢 𝙗𝙪𝙩 𝙢𝙤𝙧𝙚 𝙞𝙢𝙥𝙤𝙧𝙩𝙖𝙣𝙩𝙡𝙮 𝙗𝙮 𝙢𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 𝙤𝙛 𝙩𝙝𝙚 𝙣𝙤𝙣-𝙥𝙚𝙧𝙛𝙤𝙧𝙢𝙞𝙣𝙜 𝙥𝙖𝙘𝙠𝙖𝙜𝙚 𝙫𝙞𝙖 𝙖𝙧𝙩𝙞𝙘𝙡𝙚𝙨 𝙤𝙣 𝙈𝙚𝙙𝙞𝙪𝙢 𝙖𝙣𝙙 𝙨𝙤𝙘𝙞𝙖𝙡 𝙢𝙚𝙙𝙞𝙖."
helper = RLStringHelper(issue_text)
assert helper.get_text() == issue_text
def test_markup_parser(self):
href_markup = {
"__typename": 'Markup',
"anchorType": 'LINK',
"end": 12,
"href": 'https://readwise.io/bookreview/{{book_id',
"name": None,
"rel": 'nofollow',
"start": 0,
"title": '',
"type": 'A',
"userId": None
}
helper = RLStringHelper("Hello world")
markups = parse_markups([href_markup])
for markup in markups:
helper.set_template(markup["start"], markup["end"], markup["template"])
assert helper.get_text() == '<a style="text-decoration: underline;" rel="nofollow" title="" href="https://readwise.io/bookreview/{{book_id" target="_blank">Hello world</a>'
def test_medium_all(self):
helper = RLStringHelper("ABC Hello world")
helper.set_replace(0, 1, "B")
assert str(helper) == "BBC Hello world"
helper.set_template(4, 9, "<a>{{text}}</a>")
assert str(helper) == "BBC <a>Hello</a> world"
helper.set_template(10, 15, "<b>{{text}}</b>")
assert str(helper) == "BBC <a>Hello</a> <b>world</b>"

11
ruff.toml Normal file
View file

@ -0,0 +1,11 @@
target-version = "py37"
select = [
"E", # pycodestyle
"F", # pyflakes
"UP", # pyupgrade,
"I", # isort
]
line-length = 120
per-file-ignores = {"__init__.py" = ["F401"]}

8
scripts/build.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/bash
# pip install nuitka==1.8
# sudo apt install patchelf ccache -y
# sudo /usr/sbin/update-ccache-symlinks
# export PATH="/usr/lib/ccache:$PATH"
python3 -m nuitka --standalone --nofollow-import-to=pytest --python-flag=nosite,-O,isolated --plugin-enable=anti-bloat,implicit-imports,data-files,pylint-warnings --warn-implicit-exceptions --warn-unusual-code --prefer-source-code --include-package=uvicorn.workers --verbose --show-modules --show-memory --show-progress --show-scons server # --low-memory

37
scripts/check_health.py Normal file
View file

@ -0,0 +1,37 @@
import asyncio
import aiohttp
import os
from aiogram import Bot
from loguru import logger
BOT_TOKEN = os.getenv("BOT_TOKEN")
if not BOT_TOKEN:
raise ValueError("No bot token!")
bot = Bot(BOT_TOKEN)
ADMIN_CHAT_ID = "1621425349"
SLEEP_TIME = 15 * 60
async def main():
while True:
logger.debug("Checking health of freedium.cfd")
try:
async with aiohttp.ClientSession() as session:
async with session.get("https://freedium.cfd", timeout=3) as response:
response_status = response.status
except Exception as ex:
logger.exception(ex)
response_status = "ERROR"
finally:
if response_status != 200:
await bot.send_message(ADMIN_CHAT_ID, "EMERGENCY! SITE IS DOWN!!!")
logger.debug("Sleeping ...")
await asyncio.sleep(SLEEP_TIME)
asyncio.run(main())

2
scripts/dev_clean.sh Executable file
View file

@ -0,0 +1,2 @@
ruff check ./ --fix
black .

3
scripts/disable_redis.sh Executable file
View file

@ -0,0 +1,3 @@
sudo systemctl status redis
sudo systemctl stop redis
sudo systemctl disable redis

View file

@ -0,0 +1,54 @@
from os import listdir
from os.path import isfile, join
from jinja2 import Template
OUTPUT_RULES = []
static_files = [f for f in listdir("./static") if isfile(join("./static", f))]
static_file_template = """
handle_path /{{ file }} {
root * ./static/{{ file }}
file_server
}
"""
static_file_template_jinja = Template(static_file_template)
for file in static_files:
file_template = static_file_template_jinja.render(file=file)
OUTPUT_RULES.append(file_template)
ACCESS_DENIED_PATHS = ["onboarding/*", "wp-*", ".env", "api*", "apple-touch-icon-precomposed.png", "rss.xml", ".git/*", "apple-touch-icon-120x120.png", "apple-touch-icon-120x120-precomposed.png", "apple-touch-icon-152x152.png", "apple-touch-icon-152x152-precomposed.png", ".well-known/*"]
access_denied_paths_template = """
handle_path /{{ file }} {
respond "Access denied" 403
}
"""
access_denied_paths_template_jinja = Template(access_denied_paths_template)
for denied_path in ACCESS_DENIED_PATHS:
denied_path_template = access_denied_paths_template_jinja.render(file=denied_path)
OUTPUT_RULES.append(denied_path_template)
HUMAN_OUTPUT_RULES = "\n".join(OUTPUT_RULES)
# with open("scripts/output_rules.txt", "w") as file:
# file.write(HUMAN_OUTPUT_RULES)
caddy_file_templates = {
"CaddyfileDevTemplate": "CaddyfileDev",
"CaddyfileProdTemplate": "CaddyfileProd",
}
for caddy_file_template, output_caddy_file_template in caddy_file_templates.items():
with open(caddy_file_template) as file:
caddy_file = Template(file.read())
caddy_file_rendered = caddy_file.render(template=HUMAN_OUTPUT_RULES)
with open(output_caddy_file_template, "w") as file:
file.write(caddy_file_rendered)

View file

@ -0,0 +1,51 @@
from aiohttp_client_cache import CachedSession, SQLiteBackend
import asyncio
from loguru import logger
import json
from jinja2 import Template
from progress.bar import Bar
import datetime
MEDIUM_URLS = []
async def main():
async with CachedSession(cache=SQLiteBackend('medium_cache.sqlite')) as session:
responses = [resp async for resp in session.cache.responses.values()]
bar = Bar('Processing...', max=len(responses))
for resp in responses:
body = json.loads(resp._body)
lastmod_date = datetime.datetime.now().strftime('%Y-%m-%d')
url = body["data"]["post"]["mediumUrl"] if body["data"]["post"] is not None else None
if url is None:
logger.error("Ignoring non valid Medium post data")
bar.next()
continue
MEDIUM_URLS.append({"url": url, "lastmod": lastmod_date, "changefreq": "monthly", "priority": "1.0"})
bar.next()
bar.finish()
sitemap_template = '''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for page in pages %}
<url>
<loc>{{page.url|safe}}</loc>
<lastmod>{{page.lastmod}}</lastmod>
<changefreq>{{page.changefreq}}</changefreq>
<priority>{{page.priority}}</priority>
</url>
{% endfor %}
</urlset>'''
template = Template(sitemap_template)
sitemap_output = template.render(pages=MEDIUM_URLS)
with open("static/sitemap.xml", 'w') as f:
f.write(sitemap_output)
logger.info("Done")
asyncio.run(main())

148
scripts/output_rules.txt Normal file
View file

@ -0,0 +1,148 @@
handle_path /site.webmanifest {
root * ./static/site.webmanifest
file_server
}
handle_path /favicon-32x32.png {
root * ./static/favicon-32x32.png
file_server
}
handle_path /robots.txt {
root * ./static/robots.txt
file_server
}
handle_path /ads.txt {
root * ./static/ads.txt
file_server
}
handle_path /humans.txt {
root * ./static/humans.txt
file_server
}
handle_path /mstile-150x150.png {
root * ./static/mstile-150x150.png
file_server
}
handle_path /mstile-310x310.png {
root * ./static/mstile-310x310.png
file_server
}
handle_path /sitemap.xml {
root * ./static/sitemap.xml
file_server
}
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
file_server
}
handle_path /mstile-70x70.png {
root * ./static/mstile-70x70.png
file_server
}
handle_path /android-chrome-192x192.png {
root * ./static/android-chrome-192x192.png
file_server
}
handle_path /mstile-310x150.png {
root * ./static/mstile-310x150.png
file_server
}
handle_path /safari-pinned-tab.svg {
root * ./static/safari-pinned-tab.svg
file_server
}
handle_path /android-chrome-512x512.png {
root * ./static/android-chrome-512x512.png
file_server
}
handle_path /favicon-16x16.png {
root * ./static/favicon-16x16.png
file_server
}
handle_path /favicon.ico {
root * ./static/favicon.ico
file_server
}
handle_path /browserconfig.xml {
root * ./static/browserconfig.xml
file_server
}
handle_path /mstile-144x144.png {
root * ./static/mstile-144x144.png
file_server
}
handle_path /security.txt {
root * ./static/security.txt
file_server
}
handle_path /apple-touch-icon.png {
root * ./static/apple-touch-icon.png
file_server
}
handle_path /onboarding/* {
respond "Access denied" 403
}
handle_path /wp-* {
respond "Access denied" 403
}
handle_path /.env {
respond "Access denied" 403
}
handle_path /api* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-precomposed.png {
respond "Access denied" 403
}
handle_path /rss.xml {
respond "Access denied" 403
}
handle_path /.git/* {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-120x120-precomposed.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152.png {
respond "Access denied" 403
}
handle_path /apple-touch-icon-152x152-precomposed.png {
respond "Access denied" 403
}
handle_path /.well-known/* {
respond "Access denied" 403
}

4
scripts/register_caddy.sh Executable file
View file

@ -0,0 +1,4 @@
#!/bin/bash
arch=$(lscpu | grep Architecture | awk {'print $2'})
sudo setcap cap_net_bind_service=+ep $(pwd)/bin/${arch}/caddy

85
scripts/start_dev.sh Executable file
View file

@ -0,0 +1,85 @@
#!/bin/bash
# Same script as start_prod, but adopted to dev environment
check_env_var() {
if [[ -z "${!1}" ]]; then
echo "$1 var is blank"
else
echo "$1 var is set to '${!1}'"
fi
}
check_env_var "TELEGRAM_ADMIN_ID"
check_env_var "TELEGRAM_BOT_TOKEN"
arch=$(lscpu | grep Architecture | awk {'print $2'})
echo $arch
redis-cli flushall
./bin/$arch/caddy run --config CaddyfileDev &
CADDY_PID=$!
PYTHONASYNCIODEBUG=1 python3 -m server server &
SERVER_PID=$!
onexit() {
echo "onexit"
kill $CADDY_PID
kill $SERVER_PID
}
trap onexit EXIT
sendMessageTelegram(){
echo ${1}
local message=${1}
curl -X POST \
-H 'Content-Type: application/json' \
-d "{\"chat_id\":\"$TELEGRAM_ADMIN_ID\",\"text\":\"$message\"}" \
"https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage"
}
while true
do
sleep 15
CHECK_CADDY_PID=$(ps -A| grep $CADDY_PID |wc -l)
if [[ $CHECK_CADDY_PID -eq 0 ]]; then
# sendMessageTelegram "Restarting caddy, since it's down"
./bin/$arch/caddy start --config CaddyfileDev &
CADDY_PID=$!
fi
CHECK_SERVER_PID=$(ps -A| grep $SERVER_PID |wc -l)
if [[ $CHECK_SERVER_PID -eq 0 ]]; then
# sendMessageTelegram "Restarting server, since it's down"
PYTHONASYNCIODEBUG=1 python3 -m server server &
SERVER_PID=%!
fi
sleep 35
backend_service_url="http://localhost:7080"
backend_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$backend_service_url")
if [ "$backend_status_code" -lt 200 ]; then
sendMessageTelegram "Restarting backend, since it's down"
kill $SERVER_PID
PYTHONASYNCIODEBUG=1 python3 -m server server &
SERVER_PID=$!
fi
reverse_service_url="http://localhost:6752"
reverse_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$reverse_service_url")
if [ "$reverse_status_code" -lt 200 ]; then
sendMessageTelegram "Restarting reverse, since it's down"
kill $CADDY_PID
./bin/$arch/caddy start --config CaddyfileDev &
CADDY_PID=$!
fi
sleep 65
done

7
scripts/start_maintance.sh Executable file
View file

@ -0,0 +1,7 @@
#!/bin/bash
arch=$(lscpu | grep Architecture | awk {'print $2'})
echo $arch
redis-cli flushall
./bin/$arch/caddy run --config ./CaddyfileMaintance

74
scripts/start_prod.sh Executable file
View file

@ -0,0 +1,74 @@
#!/bin/bash
if [ -z "$TELEGRAM_ADMIN_ID" ]; then echo "TELEGRAM_ADMIN_ID var is blank"; else echo "TELEGRAM_ADMIN_ID var is set to '$TELEGRAM_ADMIN_ID'"; fi
if [ -z "$TELEGRAM_BOT_TOKEN" ]; then echo "TELEGRAM_BOT_TOKEN var is blank"; else echo "TELEGRAM_BOT_TOKEN var is set to '$TELEGRAM_BOT_TOKEN'"; fi
arch=$(lscpu | grep Architecture | awk {'print $2'})
redis-cli flushall
./bin/$arch/caddy run --config CaddyfileProd &
CADDY_PID=$!
python3 -m server server &
SERVER_PID=$!
function onexit() {
echo "onexit"
sleep 25
kill $CADDY_PID
kill $SERVER_PID
}
# trap onexit EXIT
sendMessageTelegram(){
echo ${1}
local message=${1}
curl -X POST \
-H 'Content-Type: application/json' \
-d "{\"chat_id\":\"$TELEGRAM_ADMIN_ID\",\"text\":\"$message\"}" \
"https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage"
}
while true
do
sleep 5
CHECK_CADDY_PID=$(ps -A| grep $CADDY_PID |wc -l)
if [[ $CHECK_CADDY_PID -eq 0 ]]; then
sendMessageTelegram "Restarting caddy, since it's down"
./bin/$arch/caddy run --config CaddyfileProd &
CADDY_PID=$!
fi
CHECK_SERVER_PID=$(ps -A| grep $SERVER_PID |wc -l)
if [[ $CHECK_SERVER_PID -eq 0 ]]; then
sendMessageTelegram "Restarting server, since it's down"
python3 -m server server &
SERVER_PID=$!
fi
sleep 5
backend_service_url="http://localhost:7080"
backend_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$backend_service_url")
if [ "$backend_status_code" -lt 200 ]; then
sendMessageTelegram "Restarting backend, since it's down"
kill $SERVER_PID
python3 -m server server &
SERVER_PID=$!
fi
reverse_service_url="http://localhost"
reverse_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$reverse_service_url")
if [ "$reverse_status_code" -lt 308 ]; then
sendMessageTelegram "Restarting reverse, since it's down"
kill $CADDY_PID
./bin/$arch/caddy run --config CaddyfileProd &
CADDY_PID=$!
fi
sleep 65
done

43
server/__init__.py Normal file
View file

@ -0,0 +1,43 @@
import datetime as dt
import pickledb
import logging
from contextvars import ContextVar
from typing import Optional
from jinja2 import Environment, DebugUndefined, FileSystemLoader
import redis.asyncio as redis
from xkcdpass import xkcd_password as xp
from server import config
from server.utils.loguru_handler import InterceptHandler
redis_storage = redis.Redis(host="localhost", port=6379, db=0)
jinja_env = Environment(enable_async=True)
jinja_safe_env = Environment(undefined=DebugUndefined)
template_env = Environment(loader=FileSystemLoader("./server/templates"), enable_async=True)
template_safe_env = Environment(loader=FileSystemLoader("./server/templates"), undefined=DebugUndefined)
base_template = template_env.get_template("base.html")
url_line_template = template_env.get_template("url_line.html").render()
main_template_raw = template_safe_env.get_template("main.html")
postleter_template = template_env.get_template("postleter.html")
error_template_raw = template_safe_env.get_template("error.html")
main_template_raw_rendered = main_template_raw.render(url_line=url_line_template)
main_template = jinja_env.from_string(main_template_raw_rendered)
error_template_raw_rendered = error_template_raw.render(url_line=url_line_template)
error_template = jinja_env.from_string(error_template_raw_rendered)
logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
url_correlation: ContextVar[Optional[str]] = ContextVar("url_correlation", default="UNKNOWN_URL")
transponder_code_correlation: ContextVar[Optional[str]] = ContextVar("transponder_code_correlation", default="unknown transponder location... Beep!")
ban_db = pickledb.load('ban_post_list.db', True)
START_TIME = dt.datetime.now().strftime("%H-%M-%S")
WORDS_LIST_FILE = "xkcdpass/static/legac"
xkcd_passwd = xp.generate_wordlist(wordfile=WORDS_LIST_FILE, min_length=5, max_length=8)

3
server/__main__.py Normal file
View file

@ -0,0 +1,3 @@
from server.cli import cli
cli()

28
server/cli.py Normal file
View file

@ -0,0 +1,28 @@
from argparse import ArgumentParser
from loguru import logger
def cli():
parser = ArgumentParser(prog="python3 -m server", description="Freedium server CLI")
cmd_subparsers = parser.add_subparsers(dest="cmd", required=True)
server_cmd_parser = cmd_subparsers.add_parser("server")
server_cmd_parser.add_argument("--port", nargs="?", type=int, const=7080, help="Port number", default=7080)
opts = parser.parse_args()
logger.trace(opts)
if opts.cmd == "server":
server_cmd(server_cmd_parser, opts)
def server_cmd(cmd, opts):
from server.utils.utils import is_port_in_use
if is_port_in_use(opts.port):
cmd.error(f"Port {opts.port} is in use or permission denied")
from server.worker import execute_server_worker
execute_server_worker(host="0.0.0.0", port=opts.port)

19
server/config.py Normal file
View file

@ -0,0 +1,19 @@
from starlette.config import Config
config = Config(".env")
HOST_ADDRESS = config("HOST_ADDRESS", default="https://freedium.cfd")
MEDIUM_AUTH_COOKIES = config("MEDIUM_AUTH_COOKIES", default=None)
TELEGRAM_ADMIN_ID = config("TELEGRAM_ADMIN_ID", cast=int, default=0)
ADMIN_ADMIN_SECRET_KEY = config("ADMIN_SECRET_KEY")
TELEGRAM_BOT_TOKEN = config("TELEGRAM_BOT_TOKEN", default=None)
LOG_LEVEL_NAME = config("LOG_LEVEL_NAME", default="INFO")
MORE_LOGS = config("MORE_LOGS", cast=bool, default=False)
DISABLE_EXTERNAL_DOCS = config("DISABLE_EXTERNAL_DOCS", cast=bool, default=True)
DISABLE_RATE_LIMITER = config("DISABLE_RATE_LIMITER", cast=bool, default=True)
TIMEOUT = config("TIMEOUT", cast=int, default=20)
REQUEST_TIMEOUT = config("REQUEST_TIMEOUT", cast=int, default=40)
WORKER_TIMEOUT = config("WORKER_TIMEOUT", cast=int, default=60)
SENTRY_SDK_DSN = config("SENTRY_SDK_DSN", default=None)
ENABLE_ADS_BANNER = config("ENABLE_ADS_BANNER", cast=bool, default=False)
CACHE_LIFE_TIME = config("CACHE_LIFE_TIME", cast=int, default=60 * 60 * 24)

View file

@ -0,0 +1 @@

18
server/exceptions/main.py Normal file
View file

@ -0,0 +1,18 @@
import sentry_sdk
from server.utils.error import generate_error
from server.utils.logger_trace import trace
@trace
async def handle_500_error(request, exc):
try:
raise exc
except Exception as e:
sentry_sdk.capture_exception(e)
return await generate_error()
def register_main_error_handler(app):
app.add_exception_handler(500, handle_500_error)

View file

@ -0,0 +1 @@
from server.handlers import main

62
server/handlers/main.py Normal file
View file

@ -0,0 +1,62 @@
from html5lib.html5parser import parse
from html5lib import serialize
from fastapi.responses import HTMLResponse
from server import base_template, main_template, config
from fastapi import Request
from server.handlers.post import render_medium_post_link, render_postleter
from server.handlers.reverse_proxy import miro_proxy, iframe_proxy
from server.handlers.misc import report_problem, delete_from_cache
from server.utils.logger_trace import trace
@trace
async def route_processing(path: str, request: Request):
if not path:
return await main_page()
if request.scope.get("query_string"):
path = request.url.path + "?" + request.scope["query_string"].decode()
else:
path = request.url.path
path = path.removeprefix("/")
if path.startswith("render-no-cache/"):
path = path.removeprefix("render-no-cache/")
if path.startswith("/no-redis/"):
path = path.removeprefix("/no-redis/")
return await render_medium_post_link(path, True, False)
return await render_medium_post_link(path, False)
elif path.startswith("@miro/"):
miro_data = path.removeprefix("@miro/")
return await miro_proxy(miro_data)
elif path.startswith("render_iframe/"):
iframe_id = path.removeprefix("render_iframe/")
return await iframe_proxy(iframe_id)
return await render_medium_post_link(path)
@trace
async def main_page():
postleter_template = await render_postleter(as_html=True)
main_template_rendered = await main_template.render_async(postleter=postleter_template)
base_template_rendered = await base_template.render_async(body_template=main_template_rendered, HOST_ADDRESS=config.HOST_ADDRESS)
parsed_template = parse(base_template_rendered)
serialized_template = serialize(parsed_template, encoding='utf-8')
return HTMLResponse(serialized_template)
def register_main_router(app):
app.add_api_route(path="/delete-from-cache", endpoint=delete_from_cache, methods=["POST"])
app.add_api_route(path="/report-problem", endpoint=report_problem, methods=["POST"])
app.add_api_route(
path="/{path:path}",
endpoint=route_processing,
methods=["GET", "HEAD"],
response_model=str,
tags=["pages"],
summary=None,
description=None,
)

38
server/handlers/misc.py Normal file
View file

@ -0,0 +1,38 @@
from loguru import logger
from pydantic import BaseModel
from fastapi.responses import JSONResponse
from server import config, ban_db
from server.utils.notify import send_message
from server.utils.logger_trace import trace
class ReportProblem(BaseModel):
page: str
description: str
class DeleteFromCache(BaseModel):
key: str
ADMIN_SECRET_KEY: str
@trace
async def report_problem(problem: ReportProblem):
await send_message(f"New problem report: \n{problem.description}\n\n{problem.page}")
return JSONResponse({"message": "OK"}, status_code=200)
@trace
async def delete_from_cache(key_data: DeleteFromCache):
if key_data.ADMIN_SECRET_KEY != config.ADMIN_SECRET_KEY:
return JSONResponse({"message": f"Wrong secret key: {key_data.ADMIN_SECRET_KEY}"}, status_code=403)
try:
post = MediumParser(key_data.key, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
await post.delete_from_cache()
except Exception as ex:
logger.exception(ex)
return JSONResponse({"message": f"Couldn't delete from cache: {ex}"}, status_code=500)
else:
ban_db.set(key_data.key, 1)
return JSONResponse({"message": "OK"}, status_code=200)

104
server/handlers/post.py Normal file
View file

@ -0,0 +1,104 @@
import sentry_sdk
import pickle
from fastapi.responses import HTMLResponse
from html5lib.html5parser import parse
from html5lib import serialize
from loguru import logger
from server.utils.error import generate_error
from server.utils.logger_trace import trace
from server.utils.notify import send_message
from server.utils.cache import aio_redis_cache
from server.utils.utils import correct_url, safe_check_redis_connection
from server import base_template, config, url_correlation, redis_storage, postleter_template
from medium_parser import medium_parser_exceptions
from medium_parser import cache as medium_cache
from medium_parser.core import MediumParser
from medium_parser.utils import is_valid_medium_post_id_hexadecimal
@trace
@aio_redis_cache(10 * 60)
async def render_postleter(limit: int = 60, as_html: bool = False):
random_post_id_list = [i[0] for i in medium_cache.random(limit)]
outlenget_posts_list = []
for post_id in random_post_id_list:
try:
post = MediumParser(post_id, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
await post.query()
post_metadata = await post.generate_metadata(as_dict=True)
outlenget_posts_list.append(post_metadata)
except Exception as ex:
logger.error(f"Couldn't render post_id for postleter: {post_id}, ex: {ex}")
# await send_message(f"Couldn't render post_id for postleter: {post_id}, ex: {ex}")
postleter_template_rendered = await postleter_template.render_async(post_list=outlenget_posts_list)
if as_html:
return postleter_template_rendered
return HTMLResponse(postleter_template_rendered)
@trace
async def render_medium_post_link(path: str, use_cache: bool = True, use_redis: bool = True):
redis_available = await safe_check_redis_connection(redis_storage)
try:
if is_valid_medium_post_id_hexadecimal(path):
medium_parser = MediumParser(path, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
else:
url = correct_url(path)
medium_parser = await MediumParser.from_url(url, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
medium_post_id = medium_parser.post_id
if redis_available and use_cache and use_redis:
redis_result = await redis_storage.get(medium_post_id)
else:
redis_result = None
if not redis_result:
await medium_parser.query(use_cache=use_cache)
rendered_medium_post = await medium_parser.render_as_html("server/templates")
else:
rendered_medium_post = pickle.loads(redis_result)
except medium_parser_exceptions.InvalidURL as ex:
logger.exception(ex)
sentry_sdk.capture_exception(ex)
return await generate_error(
"Unable to identify the Medium article URL.",
status_code=404,
)
except (medium_parser_exceptions.InvalidMediumPostURL, medium_parser_exceptions.MediumPostQueryError, medium_parser_exceptions.PageLoadingError) as ex:
logger.exception(ex)
sentry_sdk.capture_exception(ex)
return await generate_error(
"Unable to identify the link as a Medium.com article page. Please check the URL for any typing errors.",
status_code=404,
)
except medium_parser_exceptions.InvalidMediumPostID as ex:
logger.exception(ex)
sentry_sdk.capture_exception(ex)
return await generate_error("Unable to identify the Medium article ID.", status_code=500)
except medium_parser_exceptions.NotValidMediumURL as ex:
return await generate_error("You sure that this is a valid Medium.com URL?", status_code=404, quiet=True)
except Exception as ex:
logger.exception(ex)
sentry_sdk.capture_exception(ex)
return await generate_error(status_code=500)
else:
base_context = {
"enable_ads_header": config.ENABLE_ADS_BANNER,
"body_template": rendered_medium_post.data,
"title": rendered_medium_post.title,
"description": rendered_medium_post.description,
}
rendered_post = await base_template.render_async(base_context, HOST_ADDRESS=config.HOST_ADDRESS)
parsed_rendered_post = parse(rendered_post)
serialized_rendered_post = serialize(parsed_rendered_post, encoding='utf-8')
if not redis_result:
if not redis_available:
await send_message("ERROR: Redis is not available. Please check your configuration.")
elif use_redis:
await redis_storage.setex(medium_post_id, config.CACHE_LIFE_TIME, pickle.dumps(rendered_medium_post))
await send_message(f"✅ Successfully rendered post: {url_correlation.get()}", True, "GOOD")
return HTMLResponse(serialized_rendered_post)

View file

@ -0,0 +1,34 @@
import aiohttp
from fastapi import Response
from server import config
from server.utils.logger_trace import trace
IFRAME_HEADERS = {"Access-Control-Allow-Origin": "*", "X-Frame-Options": "SAMEORIGIN"}
@trace
async def iframe_proxy(iframe_id):
# How Medium embeds works: https://stackoverflow.com/questions/56594766/medium-embed-ly-notifyresize-does-not-work-on-safari
async with aiohttp.ClientSession() as client:
request = await client.get(
f"https://medium.com/media/{iframe_id}",
timeout=config.TIMEOUT,
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
)
request_content = await request.text()
request_content = request_content.replace("document.domain = document.domain", "console.log('[FREEDIUM] iframe workaround')")
return Response(content=request_content, media_type="text/html", headers=IFRAME_HEADERS)
@trace
async def miro_proxy(miro_data: str):
async with aiohttp.ClientSession() as client:
request = await client.get(
f"https://miro.medium.com/{miro_data}",
timeout=config.TIMEOUT,
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
)
request_content = await request.read()
content_type = request.headers["Content-Type"]
return Response(content=request_content, media_type=content_type)

77
server/main.py Normal file
View file

@ -0,0 +1,77 @@
from math import ceil
import sentry_sdk
import asyncio
from contextlib import suppress
from fastapi.exceptions import HTTPException
from fastapi import FastAPI, Depends, APIRouter
from loguru import logger
from fastapi_limiter import FastAPILimiter
from fastapi_limiter.depends import RateLimiter
from server import config, redis_storage
from server.exceptions.main import register_main_error_handler
from server.handlers.main import register_main_router
from server.middlewares import register_middlewares
from server.utils.utils import safe_check_redis_connection
NAME = "Freedium"
VERSION = "1.0"
APP_TITLE = f"{NAME}'s REST API"
APP_VERSION = VERSION
FASTAPI_APPLICATION_CONFIG = {"title": APP_TITLE, "version": APP_VERSION}
if config.DISABLE_EXTERNAL_DOCS:
FASTAPI_APPLICATION_CONFIG.update({"openapi_url": None, "docs_url": None, "redoc_url": None})
if config.SENTRY_SDK_DSN:
sentry_sdk.init(dsn=config.SENTRY_SDK_DSN, traces_sample_rate=1.0)
async def limiter_callback(request, response, pexpire: int):
expire = ceil(pexpire / 1000)
raise HTTPException(429, {"error": "Too many requests. Probably you use Freedium to train own AI moodel, hmm? :/"}, headers={"Retry-After": str(expire)})
async def limiter_identifier(request):
forwarded_ip = request.headers.get("X-Forwarded-For")
original_ip = request.headers.get("ip")
if forwarded_ip:
ip = forwarded_ip.split(",")[0]
elif original_ip:
ip = original_ip
else:
ip = "127.0.0.1"
return str(ip)
app = FastAPI(**FASTAPI_APPLICATION_CONFIG)
if config.DISABLE_RATE_LIMITER:
router = APIRouter()
else:
router = APIRouter(dependencies=[Depends(RateLimiter(times=5, seconds=2, identifier=limiter_identifier, callback=limiter_callback))])
@app.on_event("startup")
async def startup():
if not config.DISABLE_RATE_LIMITER and await safe_check_redis_connection(redis_storage):
await FastAPILimiter.init(redis_storage)
@app.on_event("shutdown")
async def shutdown():
logger.debug("Close Redis connection")
await redis_storage.close()
if config.SENTRY_SDK_DSN:
logger.debug("Flush Sentry messages")
sentry_sdk.flush()
register_main_router(router)
register_main_error_handler(app)
register_middlewares(app)
app.include_router(router)

View file

@ -0,0 +1,15 @@
from starlette.middleware.cors import CORSMiddleware
from server.middlewares.logger import LoggerMiddleware
def register_middlewares(app):
app.add_middleware(LoggerMiddleware)
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

View file

@ -0,0 +1,92 @@
import asyncio
import time
from typing import Awaitable, Callable
from loguru import logger
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response, StreamingResponse
from starlette.types import Message
from server import transponder_code_correlation, url_correlation, xkcd_passwd, xp, config
from server.utils.error import generate_error
from server.utils.utils import string_to_number_ascii
async def set_body(request: Request, body: bytes):
async def receive() -> Message:
return {"type": "http.request", "body": body}
request._receive = receive
async def get_body(request: Request) -> bytes:
body = await request.body()
await set_body(request, body)
return body
class LoggerMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[StreamingResponse]]) -> Response: # type: ignore
start_time = time.time()
generated_id = xp.generate_xkcdpassword(xkcd_passwd, delimiter="-", numwords=3)
transponder_code = string_to_number_ascii(generated_id)
transponder_code_correlation.set(transponder_code)
url_correlation.set(request.url)
with logger.contextualize(id=generated_id):
logger.trace(f"Current ID '{generated_id}' transponder code is '{transponder_code}'")
logger.trace(request.__dict__)
await request.body()
logger.debug(f"< HTTP/{request['http_version']} {request.method} {request.url}")
logger.debug(f"< IP host origin: {request.client.host}")
logger.debug("< Params:")
for name, value in request.path_params.items():
logger.debug(f"\t< {name}: {value}")
logger.debug("< Headers:")
for name, value in request.headers.items():
value = self._sanitize_header_value(name, value)
logger.debug(f"\t< {name}: {value}")
if hasattr(request, "cookies") and request.cookies:
logger.debug("< Coockies:")
for name, value in request.cookies.items():
logger.debug(f"\t< {name}: {value}")
# Workaround for stupid Starlette bug: https://github.com/tiangolo/fastapi/issues/394
await get_body(request)
try:
response = await asyncio.wait_for(call_next(request), timeout=config.REQUEST_TIMEOUT)
except Exception as ex:
logger.exception(ex)
response = await generate_error()
logger.trace(response.__dict__)
response.headers["X-Request-ID"] = generated_id
# response.headers["Access-Control-Expose-Headers"] = "X-Request-ID, Origin, X-Requested-With, Content-Type, Accept"
logger.debug(f"> HTTP/{request['http_version']} {response.status_code}")
logger.debug("> Headers:")
for name, value in response.headers.items():
value = self._sanitize_header_value(name, value)
logger.debug(f"\t> {name}: {value}")
if hasattr(response, "cookies"):
logger.debug("> Coockies:")
for name, value in response.cookies.items():
logger.debug(f"\t> {name}: {value}")
process_time = time.time() - start_time
response.headers["X-Process-Time"] = str(process_time)
return response
def _sanitize_header_value(self, name, value):
if name.lower() == "authorization":
value = f"{value[:25]}******"
return value

365
server/templates/base.html Normal file
View file

@ -0,0 +1,365 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
<title>{{ title or "Breaking Medium paywall!" }} - Freedium</title>
{% if creator %}<meta name="author" content="{{ creator.name }}" />{% endif %}
<meta name="description" content="{{ description or 'Your paywall breakthrough for Medium!' }}" />
<meta name="keywords" content="medium, paywall, medium.com, paywall breakthrough" />
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.tailwindcss.com?plugins=forms,typography,aspect-ratio"></script>
<link href="https://glyph.medium.com/css/unbound.css" rel="stylesheet">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#00aba9">
<meta name="msapplication-TileColor" content="#00aba9">
<meta name="theme-color" content="#ffffff">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/highlight.min.js"></script>
<link rel="stylesheet" href="https://unpkg.com/@highlightjs/cdn-assets@11.8.0/styles/atom-one-dark.min.css">
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@17.8.4/dist/lazyload.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lightense-images@1.0.17/dist/lightense.min.js"></script>
<script>
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
document.documentElement.classList.add('dark');
//document.getElementById('darkIcon').classList.remove('hidden');
//document.getElementById('lightIcon').classList.add('hidden')
} else {
document.documentElement.classList.remove('dark')
//document.getElementById('lightIcon').classList.remove('hidden');
//document.getElementById('darkIcon').classList.add('hidden');
}
</script>
<style>
.shadow-lf {
box-shadow: inset 3px 0 0 0 rgb(209 207 239 / var(--tw-bg-opacity));
}
</style>
<style>
.notification-container {
display: none;
position: fixed;
top: 20px;
padding: 2%;
z-index: 1000;
}
.notification-card {
background-color: #fff;
border: 1px solid #ccc;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
padding: 10px 20px;
border-radius: 5px;
text-align: center;
}
</style>
<script>
window._resizeIframe = function (iframeData)
{
iframeData.iframe.height = iframeData.height
_resizeIframeWidth()
}
function _resizeIframeWidth(){ var element = document.querySelector(".main-content");
var width = element.offsetWidth;
iframes = document.getElementsByTagName("iframe");
for (var i = 0; i < iframes.length; i++) {
iframes[i].width = width
}
window.onresize = _resizeIframeWidth
}
</script>
</head>
<div class="fixed bottom-4 left-4" style="z-index: 999999;">
<button id="openProblemModal"
class="m-1.5 flex items-center bg-red-500 text-white py-2 px-4 rounded-full shadow-lg hover:bg-red-600 focus:outline-none focus:ring-2 focus:ring-blue-500">
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><style>svg{fill:#ffffff}</style>
<path d="M256 32c14.2 0 27.3 7.5 34.5 19.8l216 368c7.3 12.4 7.3 27.7 .2 40.1S486.3 480 472 480H40c-14.3 0-27.6-7.7-34.7-20.1s-7-27.8 .2-40.1l216-368C228.7 39.5 241.8 32 256 32zm0 128c-13.3 0-24 10.7-24 24V296c0 13.3 10.7 24 24 24s24-10.7 24-24V184c0-13.3-10.7-24-24-24zm32 224a32 32 0 1 0 -64 0 32 32 0 1 0 64 0z" />
</svg>
</button>
<button id="darkModeToggle"
class="m-1.5 flex items-center bg-blue-500 text-white py-2 px-4 rounded-full shadow-lg hover:bg-blue-600 focus:outline-none">
<svg id="darkIcon" xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 384 512">
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
<path d="M223.5 32C100 32 0 132.3 0 256S100 480 223.5 480c60.6 0 115.5-24.2 155.8-63.4c5-4.9 6.3-12.5 3.1-18.7s-10.1-9.7-17-8.5c-9.8 1.7-19.8 2.6-30.1 2.6c-96.9 0-175.5-78.8-175.5-176c0-65.8 36-123.1 89.3-153.3c6.1-3.5 9.2-10.5 7.7-17.3s-7.3-11.9-14.3-12.5c-6.3-.5-12.6-.8-19-.8z" />
</svg>
<!-- SVG icon for light mode (e.g., a sun) -->
<svg class="hidden" id="lightIcon" xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
<path d="M361.5 1.2c5 2.1 8.6 6.6 9.6 11.9L391 121l107.9 19.8c5.3 1 9.8 4.6 11.9 9.6s1.5 10.7-1.6 15.2L446.9 256l62.3 90.3c3.1 4.5 3.7 10.2 1.6 15.2s-6.6 8.6-11.9 9.6L391 391 371.1 498.9c-1 5.3-4.6 9.8-9.6 11.9s-10.7 1.5-15.2-1.6L256 446.9l-90.3 62.3c-4.5 3.1-10.2 3.7-15.2 1.6s-8.6-6.6-9.6-11.9L121 391 13.1 371.1c-5.3-1-9.8-4.6-11.9-9.6s-1.5-10.7 1.6-15.2L65.1 256 2.8 165.7c-3.1-4.5-3.7-10.2-1.6-15.2s6.6-8.6 11.9-9.6L121 121 140.9 13.1c1-5.3 4.6-9.8 9.6-11.9s10.7-1.5 15.2 1.6L256 65.1 346.3 2.8c4.5-3.1 10.2-3.7 15.2-1.6zM160 256a96 96 0 1 1 192 0 96 96 0 1 1 -192 0zm224 0a128 128 0 1 0 -256 0 128 128 0 1 0 256 0z" />
</svg>
</button>
</div>
<nav id="header" class="fixed w-full z-9 top-0 dark:bg-gray-800 dark:text-white bg-white shadow">
<div class="notification-container">
<div class="notification-card dark:bg-gray-800 bg-white">
<p class="text-2xl pb-5 text-black dark:text-white">Achtung !!!</p>
<p class="pb-3 text-black dark:text-white">Sooo, it was going to take a while, but now we have it. Our whole Github organization is not public for now. Reddit community, that was beginning all of that also gone - reddit.com/r/paywall/comments/15jsr6z/bypass_mediumcom_paywall</br></br>We have moved to Codeberg - codeberg.org/Freedium-cfd</br></br>Medium, thank you >.</p>
<a href="https://patreon.com/Freedium" target="_blank" title="Patreon">
<button class="bg-red-400 mx-1 text-white hover:bg-red-500 font-semibold py-1 px-2 rounded mt-2">
Patreon
</button>
</a>
<a href="https://www.buymeacoffee.com/zhymabekroman" target="_blank" title="Buy me a coffee">
<button class="bg-orange-500 hover:bg-blue-700 mx-1 text-white font-semibold py-1 px-2 rounded mt-2">
Buy me a coffee
</button>
</a>
<button class="bg-gray-300 mx-1 hover:bg-gray-400 text-gray-800 font-semibold py-1 px-2 rounded mt-2 close-button">
Close
</button>
<a href="https://codeberg.org/Freedium-cfd/web" target="_blank" title="Codeberg">
<button class="bg-gray-700 hover:bg-gray-600 mx-1 text-white font-semibold py-1 px-2 rounded mt-2">
Source code - Codeberg
</button>
</a>
</div>
</div>
{% if enable_ads_header %}
<div class="w-full bg-yellow-400 text-center py-1 px-4"><p class="text-yellow-900">Place your advertisement here! Contact us at advertise@freedium.com</p></div>
{% endif %}
<div id="progress" class="h-1 z-20 top-0" style="background:linear-gradient(to right, #4dc0b5 var(--scroll), transparent 0)"></div>
<div class="w-full md:max-w-4xl mx-auto flex flex-wrap items-center justify-between mt-0 py-3">
<div class="pl-4">
<a class="text-green-500 text-base no-underline hover:no-underline font-extrabold text-xl"
href="/"
onclick="navigateToOrigin()">Freedium</a>
</div>
<div class="block lg:hidden pr-4">
<button id="nav-toggle"
class="flex items-center px-3 py-2 border rounded text-gray-500 dark:text-white border-gray-600 hover:text-gray-900 dark:hover:text-white hover:border-green-500 appearance-none focus:outline-none">
<svg class="fill-current h-3 w-3" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
<title>Menu</title>
<path d="M0 3h20v2H0V3zm0 6h20v2H0V9zm0 6h20v2H0v-2z" />
</svg>
</button>
</div>
<div class="w-full flex-grow lg:flex lg:items-center lg:w-auto hidden lg:block mt-2 lg:mt-0 dark:bg-gray-800 bg-white"
id="nav-content">
<ul class="list-reset lg:flex justify-end flex-1 items-center">
<li class="mr-3">
<a class="inline-block text-gray-600 dark:text-white no-underline hover:text-gray-900 dark:hover:text-white hover:text-underline py-2 px-4"
href="https://medium.com/">Medium.com</a>
</li>
<li class="mr-3">
<a class="inline-block text-gray-600 dark:text-white no-underline hover:text-gray-900 dark:hover:text-white hover:text-underline py-2 px-4"
href="https://codeberg.org/Freedium-cfd/web">Source code - Codeberg</a>
</li>
</ul>
</div>
</div>
</nav>
<body class="dark:bg-gray-800 bg-white">{{ body_template }}</body>
<div id="problemModal"
class="modal hidden fixed inset-0 w-full h-full flex items-center justify-center overflow-y-auto z-10 bg-black bg-opacity-50">
<div class="modal-container w-11/12 md:max-w-xl mx-auto rounded shadow-lg max-h-screen">
<div class="modal-content bg-white dark:bg-gray-800 dark:text-white my-8 py-4 text-left px-6">
<h1 class="text-3xl font-bold">Reporting a Problem</h1>
<div class="mt-3">
<p>Sometimes we have problems displaying some Medium posts.</br></br></p>
<p>If you have a problem that some images aren't loading - try using VPN. Probably you have problem with access to Medium CDN (or fucking Cloudflare's bot detection algorithms are blocking you).</p>
</div>
<form action="#" method="POST" class="mt-4" id="problem-form">
<div class="mb-4">
<label for="problem-description" class="block text-gray-700 dark:text-white font-bold mb-2">Problem Description</label>
<textarea id="problem-description"
name="problem-description"
placeholder="Describe your problem here..."
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
rows="4"
required></textarea>
</div>
<div>
<button type="submit"
class="m-2 bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">Submit</button>
<button type="button"
class="m-2 modal-close bg-gray-500 hover:bg-gray-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">Cancel</button>
</div>
</form>
</div>
</div>
</div>
<script>
tailwind.config = {
darkMode: 'class',
}
function navigateToOrigin() {
window.location.href = window.location.origin;
}
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
// document.documentElement.classList.add('dark');
document.getElementById('darkIcon').classList.remove('hidden');
document.getElementById('lightIcon').classList.add('hidden')
} else {
// document.documentElement.classList.remove('dark')
document.getElementById('lightIcon').classList.remove('hidden');
document.getElementById('darkIcon').classList.add('hidden');
}
document.getElementById('darkModeToggle').addEventListener('click', function() {
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
document.documentElement.classList.remove('dark');
document.getElementById('darkIcon').classList.add('hidden');
document.getElementById('lightIcon').classList.remove('hidden')
document.documentElement.style.cssText = "--lightense-backdrop: white;";
localStorage.setItem("theme", "light")
} else {
document.documentElement.classList.add('dark')
document.getElementById('lightIcon').classList.add('hidden');
document.getElementById('darkIcon').classList.remove('hidden');
document.documentElement.style.cssText = "--lightense-backdrop: black;";
localStorage.setItem("theme", "dark")
}
});
</script>
<script>
const openModalButton = document.getElementById('openProblemModal');
const closeModalButton = document.querySelector('.modal-close');
const modal = document.getElementById('problemModal');
const problemDescriptionInput = document.getElementById('problem-description');
const submitButton = document.querySelector('form button');
const body = document.body;
openModalButton.addEventListener('click', () => {
body.classList.add('overflow-hidden'); // Prevent scrolling on the body
modal.classList.remove('hidden');
});
closeModalButton.addEventListener('click', () => {
body.classList.remove('overflow-hidden'); // Re-enable scrolling on the body
modal.classList.add('hidden');
});
modal.addEventListener('click', (e) => {
if (e.target === modal) {
modal.classList.add('hidden');
body.classList.remove('overflow-hidden');
}
});
function navigateNoCache() {
window.location.href = `/render-no-cache${window.location.pathname}`;
}
const submitForm = async (event) => {
event.preventDefault();
console.log('Form submiting is started!');
submitButton.disabled = true;
// Get the problem description from the input field
const problemDescription = problemDescriptionInput.value;
const currentPage = window.location.href;
try {
// Send a POST request to the "report-problem" API endpoint
const response = await fetch('/report-problem', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ description: problemDescription, page: currentPage }),
});
if (response.ok) {
// Report submitted successfully, you can add a success message or further actions here
console.log('Problem report submitted successfully.');
modal.classList.add('hidden'); // Close the modal
} else {
// Handle errors, such as non-200 responses
console.error('Failed to submit problem report.');
submitButton.disabled = false;
}
} catch (error) {
// Handle network errors or other exceptions
console.error('An error occurred:', error);
submitButton.disabled = false;
}
};
document.getElementById('problem-form').onsubmit = submitForm;
</script>
<script>
const h = document.documentElement, b = document.body;
const st = 'scrollTop';
const sh = 'scrollHeight';
const progress = document.getElementById('progress');
const header = document.getElementById('header');
const navcontent = document.getElementById('nav-content');
document.addEventListener('scroll', function () {
/* Refresh scroll % width */
const scroll = (h[st] || b[st]) / ((h[sh] || b[sh]) - h.clientHeight) * 100;
progress.style.setProperty('--scroll', scroll + '%');
/* Apply classes for slide in bar */
const shouldAddClass = window.scrollY > 10;
});
document.getElementById('nav-toggle').onclick = function() {
document.getElementById("nav-content").classList.toggle("hidden");
}
window.addEventListener('load', function () {
Lightense('img:not(.no-lightense)');
}, false);
</script>
</body>
</html>
<script>
var lazyLoadInstance = new LazyLoad({
callback_loaded: function(element) {
Lightense(element);
},
callback_error: (img) => {
console.log(img);
if (img.hasAttribute("data-src")) {
if (img.attributes["data-src"].value.startsWith("https://miro.medium.com/v2/")) {
img.setAttribute("src", img.attributes["data-src"].value.replace("https://miro.medium.com/v2/", "{{HOST_ADDRESS}}/@miro/v2/" ));
}
}
}
});
</script>
<script>
function navigateToOrigin() {
window.location.href = window.location.origin;
}
</script>
<script>
document.addEventListener('DOMContentLoaded', () => {
const notificationContainer = document.querySelector('.notification-container');
const closeButton = document.querySelector('.close-button');
function showNotification() {
if (!localStorage.getItem('showNotification-github-block')) {
notificationContainer.style.display = 'block';
}
}
// Hide the notification
function hideNotification() {
localStorage.setItem('showNotification-github-block', 'false');
notificationContainer.style.display = 'none';
}
// Close button functionality
closeButton.addEventListener('click', () => {
hideNotification();
});
showNotification();
});
</script>

View file

@ -0,0 +1,21 @@
<div class="container w-full md:max-w-3xl mx-auto pt-40"></div>
<div class="container w-full mx-auto pt-20 pb-20 break-words">
<div class="flex flex-col items-center justify-center h-90">
<div class="bg-green-500 text-white text-6xl font-bold p-6 rounded-lg shadow-lg">Oppps!</div>
<div class="flex items-center justify-center mt-12">
<p class="dark:text-gray-200 text-gray-700 text-lg">{{ error_msg }}</p>
</div>
{{ url_line }}
<div class="flex md:max-w-2xl items-center justify-center mt-8">
<p class="dark:text-gray-200 text-gray-700">We are aware of this error. Please try again later if this was an error on our part, we will fix it as soon as possible.</p>
</div>
<div class="flex items-center justify-center mt-28">
<p class="p-5 bg-black text-white" style="font-family: monospace;">Your emergency transponder code: {{ transponder_code }}</p>
</div>
</div>
</div>
<script>
const urlMedium = document.getElementById('medium-link-input');
urlMedium.value = window.location.pathname.slice(1)
urlMedium.value += window.location.search
</script>

View file

@ -0,0 +1,9 @@
<div class="container w-full md:max-w-3xl mx-auto pt-40"></div>
<div class="container w-full mx-auto pt-20 py-20 break-words">
<div class="flex flex-col items-center justify-center h-60">
<h1 class="md:max-w-3xl text-4xl font-bold text-center text-green-500 mt-8">Freedium: Your paywall breakthrough for Medium!</h1>
{{ url_line }}
</div>
</div>
{{ postleter }}
<div class="mt-8"></div>

118
server/templates/post.html Normal file
View file

@ -0,0 +1,118 @@
<div class="container w-full md:max-w-3xl mx-auto pt-20 break-words text-gray-900 dark:text-gray-200 bg-white dark:bg-gray-800">
<div class="w-full px-4 md:px-6 text-xl text-gray-800 dark:text-gray-100 leading-normal" style="font-family:Georgia,serif;">
<div class="font-sans">
<p class="text-base md:text-sm text-green-500 font-bold pb-3">
<a href="{{ url }}#bypass" class="text-sm md:text-sm text-green-500 font-bold no-underline hover:underline ">&lt; Go to the original</a>
</p>
{% if previewImageId %}
<img alt="Preview image"
style="max-height: 65vh;
width: auto;
margin: auto"
loading="eager"
role="presentation"
src="https://miro.medium.com/v2/resize:fit:700/{{ previewImageId }}">
{% endif %}
<h1 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 pt-6 pb-2 text-3xl md:text-4xl">{{ title }}</h1>
{% if subtitle %}<h2 class="font-medium font-sans break-normal text-gray-600 dark:text-gray-200 pt-1 text-1xl md:text-1xl">{{ subtitle }}</h2>{% endif %}
</div>
<div class="bg-gray-100 dark:bg-gray-600 border border-gray-300 m-2 mt-5">
<div class="flex items-center space-x-4 p-4">
<div class="flex-shrink-0">
<a href="https://medium.com/@{{ creator.username }}" target="_blank" title="{{ creator.bio }}" class="block relative">
<img src="https://miro.medium.com/v2/resize:fill:88:88/{{ creator.imageId or '1*dmbNkD5D-u45r44go_cf0g.png' }}"
alt="{{ creator.name }}"
class="rounded-full h-11 w-11 no-lightense">
<div class="absolute bottom-0 right-0 h-3 w-3 border-2 border-white bg-green-500 rounded-full"></div>
</a>
</div>
<div class="flex-grow">
<a href="https://medium.com/@{{ creator.username }}"
target="_blank"
title="{{ creator.bio }}"
class="block font-semibold text-gray-900 dark:text-white">{{ creator.name }}</a>
<button class="text-sm text-white bg-green-500 px-3 py-1 rounded-lg mt-1 dark:bg-green-700">
<a href="https://medium.com/@{{ creator.username }}"
target="_blank"
title="{{ creator.bio }}"
class="block text-sm text-white">Follow</a>
</button>
</div>
</div>
<div class="px-4 pb-2">
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500 dark:text-white">
{% if collection %}
<a href="https://medium.com/{{ collection.slug }}"
title="{{ collection.shortDescription }}"
target="_blank"
class="flex items-center space-x-1">
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ collection.avatar.id }}"
alt="{{ collection.name }}"
class="h-4 w-4 rounded-full no-lightense">
<p>{{ collection.name }}</p>
</a>
<span>·</span>
{% endif %}
<span class="text-gray-500 dark:text-white">~{{ readingTime }} min read</span>
<span class="md:inline">·</span>
<span class="text-gray-500 dark:text-white">{{ firstPublishedAt }} (Updated: {{ updatedAt }})</span>
<span class="md:inline">·</span>
<span class="text-yellow-500 dark:text-yellow-400">Free: {{ freeAccess }}</span>
</div>
</div>
</div>
<div class="main-content mt-8">
{% for paragraph in content %}{{ paragraph }}{% endfor %}
</div>
<div class="flex flex-wrap gap-2 mt-5">
{% for tag in tags %}<a title="{{ tag.displayTitle }}" target="_blank" href="https://medium.com/tag/{{ tag.normalizedTagSlug }}"><span class="text-green-500 bg-green-100 px-2 py-1 rounded-full text-xs dark:bg-green-800 dark:text-gray-100">#{{ tag.normalizedTagSlug }}</span></a>{% endfor %}
</div>
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
</div>
<style>
.main-content {
letter-spacing: -0.06px;
font-family: source-serif-pro, Georgia, Cambria, "Times New Roman", Times, serif;
}
code {
background-color: #e3e2e2;
}
pre {
font-size: 75%;
background-color: #e3e2e2;
}
p code, ul code, li code {
font-size: 75%;
}
</style>
<script>
document.addEventListener('DOMContentLoaded', (event) => {
hljs.highlightAll();
document.querySelectorAll('pre code').forEach((el) => {
code = el.textContent;
el = el.parentElement;
el.innerHTML = '<button class="hljs-copy p-1 bg-gray-300 dark:bg-black">Copy</button>' + el.innerHTML; // append copy button
el.getElementsByClassName('hljs-copy')[0].contentCopy = code;
el.getElementsByClassName('hljs-copy')[0].addEventListener("click", function () {
this.innerText = 'Copying..';
if (!navigator.userAgent.toLowerCase().includes('safari')) {
navigator.clipboard.writeText(this.contentCopy);
} else {
prompt("Clipboard (Select: ⌘+a > Copy:⌘+c)", this.contentCopy);
}
this.innerText = 'Copied!';
button = this;
setTimeout(function () {
button.innerText = 'Copy';
}, 1500)
});
});
});
</script>
<style>
.hljs-copy {
float: right;
cursor: pointer;
}
</style>

View file

@ -0,0 +1,48 @@
<div class="p-2 grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4 w-full">
{% for post in post_list %}
<div class="p-6 bg-white dark:bg-gray-600 rounded-xl">
<a class="group post_view" post_id="{{ post.post_id }}">
<div class="max-h-72 items-center d-flex overflow-hidden">
{% if post.preview_image_id %}
<img data-src="https://miro.medium.com/v2/resize:fit:700/{{ post.preview_image_id }}"
class="lazy w-full h-auto hover:scale-105 transition transition-all duration-200 ease-in-out">
</div>
{% else %}
<img data-src="https://images.unsplash.com/photo-1636467204130-edf8ee206dce?ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&ixlib=rb-1.2.1&auto=format&fit=crop&w=600&q=80"
class="lazy w-full h-auto hover:scale-105 transition transition-all duration-200 ease-in-out">
</div>
{% endif %}
<h3 class="mt-6 leading-normal text-gray-800 dark:text-gray-100 group-hover:text-purple-400 font-semibold text-xl lg:text-2xl line-clamp-3 transition translation-all duration-200 ease-in-out">{{ post.title }}</h3>
</a>
<div class="mt-6">
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500 dark:text-white">
{% if post.collection %}
<a href="https://medium.com/{{ post.collection.slug }}"
title="{{ post.collection.shortDescription }}"
target="_blank"
class="flex items-center space-x-1">
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ post.collection.avatar.id }}"
alt="{{ post.collection.name }}"
loading="eager"
class="h-4 w-4 rounded-full no-lightense">
<p>{{ post.collection.name }}</p>
</a>
<span>·</span>
{% endif %}
<span class="text-gray-500 dark:text-white">~{{ post.reading_time }} min read</span>
<span class="md:inline dark:text-white">·</span>
<span class="text-gray-500 dark:text-white">{{ post.first_published_at }} (Updated: {{ post.updated_at }})</span>
<span class="md:inline dark:text-white">·</span>
<span class="text-yellow-500 dark:text-white">Free: {{ post.free_access }}</span>
</div>
<p class="mt-6 leading-normal line-clamp-3 text-gray-600 dark:text-gray-200">{{ post.description }}</p>
</div>
<a post_id="{{ post.post_id }}" class="inline-block mt-6 text-purple-500 hover:text-purple-400 post_view">Read More</a>
</div>
{% endfor %}
</div>
<script>
for (let post_a_el of document.getElementsByClassName("post_view")) {
post_a_el.href = `${window.location.origin}/${post_a_el.attributes["post_id"].value}`
}
</script>

View file

@ -0,0 +1,25 @@
<div class="md:max-w-6xl bg-white dark:bg-gray-600 w-full shadow-md rounded-md p-8 mt-8">
<div class="flex items-center border rounded-md border-gray-300 px-4 py-2">
<svg class="h-5 w-5 text-gray-500 dark:text-gray-100 mr-2"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 6v6m0 0v6m0-6h6m-6 0H6"></path>
</svg>
<input id="medium-link-input"
type="text"
placeholder="Enter Medium post link"
class="w-full focus:outline-none text-green-500 dark:bg-gray-600 border-gray-300"
onkeydown="if (event.keyCode == 13) document.getElementById('go-button').click()">
<button id="go-button" class="ml-2 bg-green-500 text-white px-4 py-2 rounded-md hover:bg-green-600 focus:outline-none">Go</button>
</div>
</div>
<script>
const goButton = document.getElementById('go-button');
goButton.addEventListener('click', function() {
const mediumLinkInput = document.getElementById('medium-link-input');
window.location.href = `${window.location.origin}/${mediumLinkInput.value}`;
});
</script>

44
server/utils/cache.py Normal file
View file

@ -0,0 +1,44 @@
import pickle
from server import redis_storage
from functools import wraps
from loguru import logger
from server.utils.utils import safe_check_redis_connection
def aio_redis_cache(expire_time: int = 60 * 10): # enable_pickle: bool = False
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
if not await safe_check_redis_connection(redis_storage):
logger.error("REDIS is not available!")
return await func(*args, **kwargs)
# logger.trace(f"{enable_pickle=}, {expire_time=}")
logger.trace(f"{expire_time=}")
# Serialize the arguments and function name as a key for Redis
key = "{}-{}".format(func.__name__, ",".join(str(arg) for arg in args))
logger.trace(f"REDIS key: {key}")
result = await redis_storage.get(key)
if result is not None:
# If the result is found in Redis cache, deserialize and return it
# if enable_pickle: # type(result).__name__ != "str"
result_raw = pickle.loads(result)
# else:
# result = result.decode("utf-8")
logger.trace("Result found in REDIS")
else:
logger.trace("Result not found in REDIS")
# If the result is not found in Redis cache, call the original function
result_raw = await func(*args, **kwargs)
# if enable_pickle:
result = pickle.dumps(result_raw)
# else:
# result = result.encode("utf-8")
# Store the result in Redis with an expiration time
await redis_storage.setex(key, expire_time, result)
return result_raw
return wrapper
return decorator

55
server/utils/error.py Normal file
View file

@ -0,0 +1,55 @@
import random
from fastapi.responses import HTMLResponse
from server import (
base_template,
config,
error_template,
transponder_code_correlation,
url_correlation
)
from server.utils.logger_trace import trace
from server.utils.notify import send_message
# ChatGPT promt: Make this text more Humoristic in one sentenced text, 15 different with emojies as Python list: Sorry to hear that but we have some problem
ERROR_MSG_LIST = [
"Sorry to hear that, but we've got a problem that's bigger than my inability to resist a donut! 🍩",
"Apologies for the inconvenience, but we've hit a snag - it's not as funny as my cat chasing its tail, but it's a problem nonetheless! 🐱",
"Sorry to hear that, but we've encountered a problem - it's not as entertaining as a clown at a circus, but it's there! 🎪",
"Oops! We've stumbled upon a problem, but don't worry, it's not as disastrous as my cooking! 🍳",
"Sorry to hear that, but we've got a problem that's more stubborn than a mule on a Monday morning! 🐴",
"Apologies, but we've run into a problem - it's not as amusing as my grandma's dance moves, but it's a problem! 👵💃",
"Sorry to hear that, but we've got a problem that's more tangled than my headphone wires! 🎧",
"Oops! We've hit a problem, but don't worry, it's not as catastrophic as my last blind date! 💔",
"Sorry to hear that, but we've got a problem that's more elusive than a sock in a washing machine! 🧦",
"Apologies, but we've run into a problem - it's not as hilarious as my attempt at yoga, but it's a problem! 🧘‍♂️",
"Sorry to hear that, but we've got a problem that's more confusing than a chameleon in a bag of Skittles! 🦎🌈",
"Oops! We've encountered a problem, but don't worry, it's not as disastrous as my attempt at karaoke! 🎤",
"Sorry to hear that, but we've got a problem that's more stubborn than a toddler refusing to eat their veggies! 👶🥦",
"Apologies, but we've run into a problem - it's not as amusing as my dog trying to catch its tail, but it's a problem! 🐶",
"Sorry to hear that, but we've got a problem that's more elusive than the end of a rainbow! 🌈"
]
@trace
async def generate_error(error_msg: str = None, title: str = "Error", status_code: int = 500, quiet: bool = False):
if not error_msg:
error_msg = random.choice(ERROR_MSG_LIST)
"""
if not quiet:
await send_message(
f"📛 Error while processing url: <code>{url_correlation.get()}</code>, transponder_code: <code>{transponder_code_correlation.get()}</code>, error: <code>{error_msg}</code>"
)
"""
error_template_rendered = await error_template.render_async(error_msg=error_msg, transponder_code=transponder_code_correlation.get())
base_context = {
"enable_ads_header": config.ENABLE_ADS_BANNER,
"body_template": error_template_rendered,
"title": title,
}
base_template_rendered = await base_template.render_async(base_context, HOST_ADDRESS=config.HOST_ADDRESS)
return HTMLResponse(base_template_rendered, status_code=status_code)

117
server/utils/logger.py Normal file
View file

@ -0,0 +1,117 @@
# Source: https://pawamoy.github.io/posts/unify-logging-for-a-gunicorn-uvicorn-app/
# This code taken from comment by GroverChouT
import logging
import os
import sys
from pprint import pprint
from gunicorn.glogging import Logger
from loguru import logger
from loguru._datetime import datetime as loguru_datetime
from server import START_TIME, config
ENQUEUE = True
# Python's logging module is not supporting TRACE level
# https://bugs.python.org/issue31732
# https://betterstack.com/community/guides/logging/how-to-start-logging-with-python/
logging.addLevelName("TRACE", 5)
BACKTRACE = True
DIAGNOSE = True
LOG_LEVEL = logging.getLevelName(config.LOG_LEVEL_NAME)
LOG_FORMAT = "[{process.id}] | <green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>[{extra[id]}] - {message}</level>"
LOG_FOLDER_PATH = f"server/user_data/logs/{{time:YYYY-MM-DD}}/{START_TIME}"
LOG_FOLDER_PATH_FORMATED = LOG_FOLDER_PATH.format(time=loguru_datetime.now())
def logger_register():
pid = os.getpid()
handlers = [
{
"sink": sys.stdout,
"level": LOG_LEVEL,
"format": LOG_FORMAT,
"enqueue": ENQUEUE,
"backtrace": BACKTRACE,
"diagnose": DIAGNOSE,
},
{
"sink": f"{LOG_FOLDER_PATH}/standart_{pid}_log_server",
"level": LOG_LEVEL,
"format": LOG_FORMAT,
"enqueue": ENQUEUE,
}
]
if config.MORE_LOGS:
handlers.append({
"sink": f"{LOG_FOLDER_PATH}/trace_{pid}_log_server",
"level": "TRACE",
"format": LOG_FORMAT,
"enqueue": ENQUEUE,
})
handlers.append({
"sink": f"{LOG_FOLDER_PATH}/debug_{pid}_log_server",
"level": "DEBUG",
"format": LOG_FORMAT,
"enqueue": ENQUEUE,
})
logger.configure(
handlers=handlers,
extra={"id": None},
)
class InterceptHandler(logging.Handler):
def emit(self, record):
# Get corresponding Loguru level if it exists
try:
level = logger.level(record.levelname).name
except ValueError:
level = record.levelno
# Find caller from where originated the logged message
frame, depth = logging.currentframe(), 2
while frame.f_code.co_filename == logging.__file__:
frame = frame.f_back
depth += 1
raw_message = record.getMessage()
try:
logger.opt(depth=depth, exception=record.exc_info).log(level, raw_message)
except Exception as ex:
pprint(raw_message)
print(raw_message)
raise ex
class GunicornLogger(Logger):
def setup(self, cfg) -> None:
handler = InterceptHandler()
# logging.getLogger("gunicorn.error").handlers = [InterceptHandler()]
# logging.getLogger("gunicorn.access").handlers = [InterceptHandler()]
# Add log handler to logger and set log level
self.error_log.addHandler(handler)
self.error_log.setLevel(LOG_LEVEL)
self.access_log.addHandler(handler)
self.access_log.setLevel(LOG_LEVEL)
# Configure logger before gunicorn starts logging
logger_register()
def configure_logger() -> None:
logging.root.handlers = [InterceptHandler()]
logging.root.setLevel(LOG_LEVEL)
# Remove all log handlers and propagate to root logger
for name in logging.root.manager.loggerDict.keys():
logging.getLogger(name).handlers = []
logging.getLogger(name).propagate = True
# Configure logger (again) if gunicorn is not used
logger_register()

View file

@ -0,0 +1,39 @@
import asyncio
import time
from functools import wraps
from loguru import logger
def trace(func):
if asyncio.iscoroutinefunction(func):
logger.trace(f"{func.__name__!r} function is a coroutine")
@wraps(func)
async def wrapper(*args, **kwargs):
start_ts = time.time()
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
original_result = await func(*args, **kwargs)
logger.trace(f"Result: {original_result}")
logger.trace(f"Result type: {type(original_result)}")
duration_ts = time.time() - start_ts
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
return original_result
else:
logger.trace(f"{func.__name__!r} is not a coroutine")
@wraps(func)
def wrapper(*args, **kwargs):
start_ts = time.time()
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
original_result = func(*args, **kwargs)
logger.trace(f"Result: {original_result}")
logger.trace(f"Result type: {type(original_result)}")
duration_ts = time.time() - start_ts
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
return original_result
return wrapper

View file

@ -0,0 +1,30 @@
# Based on: https://stackoverflow.com/a/72735401/13452914
import logging
import sys
from loguru import logger
class InterceptHandler(logging.Handler):
"""
Add logging handler to augment python stdlib logging.
Logs which would otherwise go to stdlib logging are redirected through
loguru.
"""
@logger.catch(default=True, onerror=lambda _: sys.exit(1))
def emit(self, record):
# Get corresponding Loguru level if it exists.
try:
level = logger.level(record.levelname).name
except ValueError:
level = record.levelno
# Find caller from where originated the logged message.
frame, depth = sys._getframe(6), 6
while frame and frame.f_code.co_filename == logging.__file__:
frame = frame.f_back
depth += 1
logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())

44
server/utils/notify.py Normal file
View file

@ -0,0 +1,44 @@
import asyncio
import aiohttp
from enum import Enum
from loguru import logger
from server import config
class MessageStatus(Enum):
ERROR = "ERROR"
GOOD = "GOOD"
async def send_message(text: str, silent: bool = False, status: MessageStatus = "ERROR") -> None:
asyncio.create_task(task_send_message(text, silent, status))
async def task_send_message(text: str, silent: bool = False, status: MessageStatus = "ERROR") -> None:
if not config.TELEGRAM_BOT_TOKEN or not config.TELEGRAM_ADMIN_ID:
logger.warning("Can't send log messages, because of lack of some informations. Ignore....")
return
if status == MessageStatus.GOOD.value:
return True
if len(text) > 4000:
logger.warning(f"Message is too long ({len(text)}): {text}")
text = text[:4000]
url = f"https://api.telegram.org/bot{config.TELEGRAM_BOT_TOKEN}/sendMessage"
data = {
"chat_id": config.TELEGRAM_ADMIN_ID,
"text": text,
"parse_mode": "HTML",
"disable_notification": silent
}
async with aiohttp.ClientSession() as session:
async with session.post(url, data=data) as response:
if response.status == 200:
logger.info("Message sent successfully")
else:
logger.warning(f"Failed to send message. Status: {response.status}")

49
server/utils/utils.py Normal file
View file

@ -0,0 +1,49 @@
import random
import re
import socket
from server.utils.logger_trace import trace
DEFAULT_PROTOCOL = "https://"
@trace
def correct_url(url: str) -> str:
# Workaround for Safari bug
url = re.sub(r"https?://?", DEFAULT_PROTOCOL, url)
# parsed_url = urlparse(url)
# if not bool(parsed_url.netloc and parsed_url.scheme):
# return DEFAULT_PROTOCOL + url
# if not re.match(r'http[s]?://', url):
# url = DEFAULT_PROTOCOL + url
return url
def string_to_number_ascii(input_str: str, key_number: int = None):
if not key_number:
key_number = random.randint(0, 100)
input_str = input_str.upper()
result = sum(ord(char) for char in input_str)
result *= key_number
return result
def is_negative(num: int) -> bool:
return num < 0
async def safe_check_redis_connection(connection):
try:
response = await connection.ping()
except Exception:
return False
else:
return response
def is_port_in_use(port: int) -> bool:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(("localhost", port)) == 0

67
server/worker.py Normal file
View file

@ -0,0 +1,67 @@
import atexit
import multiprocessing
from multiprocessing.util import _exit_function
import gunicorn.app.base
import uvicorn
from loguru import logger
from server import config, ban_db
from server.main import app
from server.utils.logger import GunicornLogger
from server.utils.logger_trace import trace
logger.trace(f"Uvicorn version: {uvicorn.__version__}")
def post_worker_init(worker):
# Remove the atexit handler set up by the parent process
# https://github.com/benoitc/gunicorn/issues/1391#issuecomment-467010209
logger.trace("Removing atexit handler")
atexit.unregister(_exit_function)
def on_exit():
logger.debug("GUNICORN: On exit")
ban_db.dump()
@trace
def number_of_workers():
cores = multiprocessing.cpu_count()
if cores >= 8:
workers = cores
else:
workers = cores * 2
# workers = (cores * 2) + 2
logger.debug(f"Number of workers: {workers}")
return workers
class GunicornStandaloneApplication(gunicorn.app.base.BaseApplication):
def __init__(self, app, options=None):
self.options = options or {}
self.application = app
super().__init__()
def load_config(self):
config = {key: value for key, value in self.options.items() if key in self.cfg.settings and value is not None}
for key, value in config.items():
self.cfg.set(key.lower(), value)
def load(self):
return self.application
def execute_server_worker(host: str, port: int):
options = {
"bind": f"{host}:{port}",
"workers": number_of_workers(),
"logger_class": GunicornLogger,
"worker_class": "uvicorn.workers.UvicornWorker",
"preload_app": True,
"post_worker_init": post_worker_init,
"timeout": config.WORKER_TIMEOUT,
# "on_exit": on_exit,
}
GunicornStandaloneApplication(app, options).run()

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
static/apple-touch-icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

9
static/browserconfig.xml Normal file
View file

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<browserconfig>
<msapplication>
<tile>
<square150x150logo src="/mstile-150x150.png"/>
<TileColor>#00aba9</TileColor>
</tile>
</msapplication>
</browserconfig>

BIN
static/favicon-16x16.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
static/favicon-32x32.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
static/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

1
static/humans.txt Normal file
View file

@ -0,0 +1 @@
Me in touch: https://github.com/ZhymabekRoman

BIN
static/mstile-144x144.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7 KiB

BIN
static/mstile-150x150.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

BIN
static/mstile-310x150.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Some files were not shown because too many files have changed in this diff Show more