Moving codebase from GitHub
4
.env_template
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
HOST_ADDRESS = "http://localhost:6752"
|
||||
TIMEOUT=3
|
||||
ADMIN_SECRET_KEY="test"
|
||||
MEDIUM_AUTH_COOKIES="Get your premium subscription account coockies here, uid and sid properties is required"
|
||||
167
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
user_data/
|
||||
*.dist/
|
||||
*.build/
|
||||
medium_cache.sqlite
|
||||
medium_db_cache.sqlite
|
||||
ban_post_list.db
|
||||
3
.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "server/toolkits/core"]
|
||||
path = server/toolkits/core
|
||||
url = https://github.com/Freedium-cfd/core
|
||||
158
CaddyfileDev
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
:6752 {
|
||||
# header Server "nginx"
|
||||
encode gzip
|
||||
header -Server
|
||||
|
||||
|
||||
handle_path /site.webmanifest {
|
||||
root * ./static/site.webmanifest
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-32x32.png {
|
||||
root * ./static/favicon-32x32.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /robots.txt {
|
||||
root * ./static/robots.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /ads.txt {
|
||||
root * ./static/ads.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /humans.txt {
|
||||
root * ./static/humans.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-150x150.png {
|
||||
root * ./static/mstile-150x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x310.png {
|
||||
root * ./static/mstile-310x310.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /sitemap.xml {
|
||||
root * ./static/sitemap.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
|
||||
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-70x70.png {
|
||||
root * ./static/mstile-70x70.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-192x192.png {
|
||||
root * ./static/android-chrome-192x192.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x150.png {
|
||||
root * ./static/mstile-310x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /safari-pinned-tab.svg {
|
||||
root * ./static/safari-pinned-tab.svg
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-512x512.png {
|
||||
root * ./static/android-chrome-512x512.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-16x16.png {
|
||||
root * ./static/favicon-16x16.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon.ico {
|
||||
root * ./static/favicon.ico
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /browserconfig.xml {
|
||||
root * ./static/browserconfig.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-144x144.png {
|
||||
root * ./static/mstile-144x144.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /security.txt {
|
||||
root * ./static/security.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon.png {
|
||||
root * ./static/apple-touch-icon.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /onboarding/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /wp-* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.env {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /api* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /rss.xml {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.git/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.well-known/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
route /* {
|
||||
reverse_proxy localhost:7080
|
||||
}
|
||||
}
|
||||
11
CaddyfileDevTemplate
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
:6752 {
|
||||
# header Server "nginx"
|
||||
encode gzip
|
||||
header -Server
|
||||
|
||||
{{ template }}
|
||||
|
||||
route /* {
|
||||
reverse_proxy localhost:7080
|
||||
}
|
||||
}
|
||||
31
CaddyfileMaintance
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
|
||||
freedium.cfd {
|
||||
# header Server "nginx"
|
||||
encode gzip
|
||||
header -Server
|
||||
|
||||
route /* {
|
||||
header Content-Type text/html
|
||||
respond <<HTML
|
||||
<!doctype html>
|
||||
<title>Site Maintenance</title>
|
||||
<style>
|
||||
body { text-align: center; padding: 150px; }
|
||||
h1 { font-size: 50px; }
|
||||
body { font: 20px Helvetica, sans-serif; color: #333; }
|
||||
article { display: block; text-align: left; width: 650px; margin: 0 auto; }
|
||||
a { color: #dc8100; text-decoration: none; }
|
||||
a:hover { color: #333; text-decoration: none; }
|
||||
</style>
|
||||
|
||||
<article>
|
||||
<h1>We’ll be back soon!</h1>
|
||||
<div>
|
||||
<p>Sorry for the inconvenience but we’re performing some maintenance at the moment. If you need to you can always <a href="mailto:#admin@freedium.cfd">contact us</a>, otherwise we’ll be back online shortly!</p>
|
||||
<p>— Freedium developers</p>
|
||||
</div>
|
||||
</article>
|
||||
|
||||
HTML 200
|
||||
}
|
||||
}
|
||||
159
CaddyfileProd
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
|
||||
freedium.cfd {
|
||||
# header Server "nginx"
|
||||
encode gzip
|
||||
header -Server
|
||||
|
||||
|
||||
handle_path /site.webmanifest {
|
||||
root * ./static/site.webmanifest
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-32x32.png {
|
||||
root * ./static/favicon-32x32.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /robots.txt {
|
||||
root * ./static/robots.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /ads.txt {
|
||||
root * ./static/ads.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /humans.txt {
|
||||
root * ./static/humans.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-150x150.png {
|
||||
root * ./static/mstile-150x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x310.png {
|
||||
root * ./static/mstile-310x310.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /sitemap.xml {
|
||||
root * ./static/sitemap.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
|
||||
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-70x70.png {
|
||||
root * ./static/mstile-70x70.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-192x192.png {
|
||||
root * ./static/android-chrome-192x192.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x150.png {
|
||||
root * ./static/mstile-310x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /safari-pinned-tab.svg {
|
||||
root * ./static/safari-pinned-tab.svg
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-512x512.png {
|
||||
root * ./static/android-chrome-512x512.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-16x16.png {
|
||||
root * ./static/favicon-16x16.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon.ico {
|
||||
root * ./static/favicon.ico
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /browserconfig.xml {
|
||||
root * ./static/browserconfig.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-144x144.png {
|
||||
root * ./static/mstile-144x144.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /security.txt {
|
||||
root * ./static/security.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon.png {
|
||||
root * ./static/apple-touch-icon.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /onboarding/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /wp-* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.env {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /api* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /rss.xml {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.git/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.well-known/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
route /* {
|
||||
reverse_proxy localhost:7080
|
||||
}
|
||||
}
|
||||
12
CaddyfileProdTemplate
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# https://futurestud.io/tutorials/caddy-reverse-proxy-a-node-js-app
|
||||
freedium.cfd {
|
||||
# header Server "nginx"
|
||||
encode gzip
|
||||
header -Server
|
||||
|
||||
{{ template }}
|
||||
|
||||
route /* {
|
||||
reverse_proxy localhost:7080
|
||||
}
|
||||
}
|
||||
53
README.md
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
<p align="center"><a href="https://iosf.in/" target="_blank"><img src="https://avatars.githubusercontent.com/u/142643505?s=200&v=4" width="20%"></a></p>
|
||||
|
||||
<h1 align="center">Freedium: Your paywall breakthrough for Medium!</h1>
|
||||
|
||||
[](https://www.buymeacoffee.com/zhymabekroman)
|
||||
|
||||
## FAQ
|
||||
### What is happened to GitHub organization?
|
||||
Our whole Github organization is not public for now. Reddit community, that was beginning all of that unfourtunately also gone. So we have moved to Codeberg
|
||||
|
||||
### Why did we create Freedium?
|
||||
In mid-June to mid-July 2023, Medium changed their paywall method, and all old paywall bypass methods we had stopped working. So I became obsessed with the idea of creating a service to bypass Medium's paywalled posts. Honestly I am not a big fan of Medium, but I sometimes read articles to improve my knowledge.
|
||||
|
||||
### How does Freedium work?
|
||||
In the first version of Freedium, we reverse-engineered Medium.com's GraphQL endpoints and built our own parser and toolkits to show you unpaywalled Medium posts. Unfortunately, Medium closed this loophole and nowadays we just pay subscriptions and share access through Freedium. Sometimes we got a bugs because of the self-written parser, but we are working to make Freedium bug-free.
|
||||
|
||||
### What language are being used?
|
||||
We use Python, with Jinja template builder, and some JS magic in Frontend :)
|
||||
|
||||
### Wow! I would like to contribute to Freedium. How can I do that?
|
||||
We need volunteers who have Medium subscriptions because we might get banned by Medium. And if you developer you can start from the this (https://codeberg.org/Freedium-cfd/web) repository.
|
||||
|
||||
### Plans, future?
|
||||
Speed up Freedium, and probably create open source Medium frontend in next life
|
||||
|
||||
## Tech stack:
|
||||
- FastAPI, Gunicorn, Unicorn as worker,
|
||||
- Tailwinds CSS v3
|
||||
- Dragonfly (Redis like key-value database)
|
||||
- Jinja2
|
||||
- Python 3.9+
|
||||
- Caddy
|
||||
- Sentry
|
||||
|
||||
## Local run:
|
||||
Requirements:
|
||||
- Medium subscription
|
||||
- Python 3.9+
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Freedium-cfd/web ./web
|
||||
cd ./web
|
||||
pip install -r requirements.txt
|
||||
# for linux also do: pip install -r requirements-fast.txt
|
||||
pip install ./core
|
||||
pip install ./rl_string_helper
|
||||
|
||||
```
|
||||
|
||||
Now we need configure our Freedium instance. Copy `.env_template` to `.env` configuration file and set values, required for you.
|
||||
|
||||
If you have linux, execute `./script/start_dev.sh` and open in browser 'localhost:6752'. That will execute Caddy reverse proxy.
|
||||
If you have other OS or want test without reverse proxy, you can execute server using command `python3 -m server server` and access by address 'localhost:7080':
|
||||
2
bin/versions.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
dragonfly==1.7.1
|
||||
caddy==2.7.6
|
||||
BIN
bin/x86_64/caddy
Executable file
BIN
bin/x86_64/dragonfly
Executable file
164
core/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
medium_cache.sqlite
|
||||
query_result.json
|
||||
medium.html
|
||||
3
core/.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "medium_parser/toolkits/rl_string_helper"]
|
||||
path = medium_parser/toolkits/rl_string_helper
|
||||
url = https://github.com/Freedium-cfd/rl-string-helper
|
||||
8
core/README.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Medium.com GraphQL API parser
|
||||
This repo is containing a parser for the Medium GraphQL API.
|
||||
|
||||
## Export format:
|
||||
- HTML (with Tailwinds CSS)
|
||||
|
||||
## TODO:
|
||||
- Add Markdown export support
|
||||
15
core/medium_parser/__init__.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
from aiohttp_retry import ExponentialRetry
|
||||
|
||||
import jinja2
|
||||
from .cache_db import SQLiteCacheBackend
|
||||
|
||||
cache = SQLiteCacheBackend('medium_db_cache.sqlite')
|
||||
cache.init_db()
|
||||
|
||||
retry_options = ExponentialRetry(attempts=3)
|
||||
|
||||
from . import exceptions as exceptions
|
||||
from . import exceptions as medium_parser_exceptions
|
||||
|
||||
jinja_env = jinja2.Environment(enable_async=True)
|
||||
89
core/medium_parser/cache_db.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
from typing import Union
|
||||
import sqlite3
|
||||
import json
|
||||
from warnings import warn
|
||||
try:
|
||||
import sqlite_zstd
|
||||
except ImportError:
|
||||
warn("Can't use zstd compression. Please install 'sqlite_zstd' package")
|
||||
sqlite_zstd = None
|
||||
|
||||
class CacheResponse:
|
||||
__slots__ = ('data')
|
||||
def __init__(self, data: str):
|
||||
self.data = data
|
||||
|
||||
def json(self):
|
||||
return json.loads(self.data)
|
||||
|
||||
def __repr__(self):
|
||||
return self.data
|
||||
|
||||
def __str__(self):
|
||||
return self.data
|
||||
|
||||
class SQLiteCacheBackend:
|
||||
__slots__ = ('connection', 'cursor')
|
||||
def __init__(self, database: str):
|
||||
self.connection = sqlite3.connect(database)
|
||||
self.connection.enable_load_extension(True) # Enable loading of extensions
|
||||
self.connection.execute("PRAGMA foreign_keys = ON") # Need for working with foreign keys in db
|
||||
self.connection.execute("PRAGMA journal_mode=WAL")
|
||||
self.connection.execute("PRAGMA auto_vacuum=full")
|
||||
self.cursor = self.connection.cursor()
|
||||
|
||||
if sqlite_zstd is not None:
|
||||
sqlite_zstd.load(self.connection)
|
||||
|
||||
def all(self):
|
||||
with self.connection:
|
||||
return self.cursor.execute("SELECT * FROM cache").fetchall()
|
||||
|
||||
def all_length(self) -> int:
|
||||
with self.connection:
|
||||
return self.cursor.execute("SELECT COUNT(*) FROM cache").fetchone()[0]
|
||||
|
||||
def random(self, size: int):
|
||||
with self.connection:
|
||||
return self.cursor.execute("SELECT * FROM cache ORDER BY RANDOM() LIMIT :0", {'0': size}).fetchall()
|
||||
|
||||
def enable_zstd(self):
|
||||
if sqlite_zstd is None:
|
||||
raise ValueError("Can't use zstd compression. Please install 'sqlite_zstd' package")
|
||||
|
||||
with self.connection:
|
||||
self.cursor.execute("SELECT zstd_enable_transparent('{\"table\": \"cache\", \"column\": \"value\", \"compression_level\": 9, \"dict_chooser\": \"''a''\"}')")
|
||||
try:
|
||||
self.connection.execute("PRAGMA auto_vacuum=full")
|
||||
except Exception as error:
|
||||
print(error)
|
||||
self.cursor.execute("SELECT zstd_incremental_maintenance(null, 1);")
|
||||
self.cursor.execute("vacuum;")
|
||||
|
||||
def init_db(self):
|
||||
with self.connection:
|
||||
self.cursor.execute("CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value TEXT)")
|
||||
|
||||
def pull(self, key: str) -> Union[dict, str]:
|
||||
with self.connection:
|
||||
cache = self.cursor.execute("SELECT value FROM cache WHERE key = :0", {'0': key}).fetchone()
|
||||
if cache:
|
||||
return CacheResponse(cache[0])
|
||||
|
||||
def push(self, key: str, value: str) -> None:
|
||||
if isinstance(value, dict):
|
||||
value = json.dumps(value)
|
||||
elif not isinstance(value, str):
|
||||
raise ValueError(f"value argument should be only string type not {type(value).__name__}")
|
||||
with self.connection:
|
||||
self.cursor.execute("INSERT OR REPLACE INTO cache VALUES (:0, :1)", {'0': key, '1': value})
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
with self.connection:
|
||||
self.cursor.execute("DELETE FROM cache WHERE key = :0", {'0': key})
|
||||
|
||||
def close(self):
|
||||
self.__del__()
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.connection.close()
|
||||
434
core/medium_parser/core.py
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
import os
|
||||
import math
|
||||
import textwrap
|
||||
import urllib.parse
|
||||
|
||||
import jinja2
|
||||
import tld
|
||||
from loguru import logger
|
||||
|
||||
from rl_string_helper import (RLStringHelper, parse_markups,
|
||||
split_overlapping_ranges)
|
||||
|
||||
from . import cache, jinja_env
|
||||
from .exceptions import (InvalidMediumPostID, InvalidMediumPostURL, InvalidURL,
|
||||
MediumParserException, MediumPostQueryError)
|
||||
from .medium_api import query_post_by_id
|
||||
from .models.html_result import HtmlResult
|
||||
from .time import convert_datetime_to_human_readable
|
||||
from .utils import (get_medium_post_id_by_url, getting_percontage_of_match,
|
||||
is_valid_medium_post_id_hexadecimal, is_valid_medium_url,
|
||||
is_valid_url, sanitize_url)
|
||||
|
||||
|
||||
class MediumParser:
|
||||
__slots__ = ('__post_id', 'post_data', 'jinja', 'timeout', 'host_address', 'auth_cookies')
|
||||
|
||||
def __init__(self, post_id: str, timeout: int, host_address: str, auth_cookies: str = None):
|
||||
self.timeout = timeout
|
||||
self.host_address = host_address
|
||||
self.post_id = post_id
|
||||
self.post_data = None
|
||||
self.auth_cookies = auth_cookies
|
||||
|
||||
@classmethod
|
||||
async def from_url(cls, url: str, timeout: int, host_address: str) -> 'MediumParser':
|
||||
sanitized_url = sanitize_url(url)
|
||||
if is_valid_url(url) and not await is_valid_medium_url(sanitized_url, timeout):
|
||||
raise InvalidURL(f'Invalid medium URL: {sanitized_url}')
|
||||
|
||||
post_id = await get_medium_post_id_by_url(sanitized_url, timeout)
|
||||
if not post_id:
|
||||
raise InvalidMediumPostURL(f'Could not find medium post ID for URL: {sanitized_url}')
|
||||
|
||||
return cls(post_id, timeout, host_address)
|
||||
|
||||
@property
|
||||
def post_id(self):
|
||||
return self.__post_id
|
||||
|
||||
@post_id.setter
|
||||
def post_id(self, value):
|
||||
if not is_valid_medium_post_id_hexadecimal(value):
|
||||
raise InvalidMediumPostID(f'Invalid medium post ID: {value}')
|
||||
|
||||
self.__post_id = value
|
||||
|
||||
@post_id.getter
|
||||
def post_id(self):
|
||||
return self.__post_id
|
||||
|
||||
async def delete_from_cache(self, post_id: str = None):
|
||||
if not post_id:
|
||||
post_id = self.post_id
|
||||
|
||||
cache.delete(post_id)
|
||||
|
||||
return True
|
||||
|
||||
async def get_post_data_from_cache(self):
|
||||
logger.debug("Using cache backend")
|
||||
post_data = cache.pull(self.post_id)
|
||||
if post_data:
|
||||
logger.debug("post query was found on cache")
|
||||
return post_data.json()
|
||||
return None
|
||||
|
||||
async def get_post_data_from_api(self):
|
||||
logger.debug("Cache backend disabled, using API")
|
||||
try:
|
||||
return await query_post_by_id(self.post_id, self.timeout, self.auth_cookies)
|
||||
except Exception as ex:
|
||||
logger.debug("Error while querying post by Medium API")
|
||||
logger.exception(ex)
|
||||
return None
|
||||
|
||||
async def query(self, use_cache: bool = True):
|
||||
post_data = await self.get_post_data_from_cache() if use_cache else None
|
||||
|
||||
if not post_data:
|
||||
post_data = await self.get_post_data_from_api()
|
||||
|
||||
if not post_data or not isinstance(post_data, dict) or post_data.get("error") or not post_data.get("data") or not post_data.get("data").get("post"):
|
||||
raise MediumPostQueryError(f'Could not query post by ID from API: {self.post_id}')
|
||||
|
||||
cache.push(self.post_id, post_data)
|
||||
|
||||
self.post_data = post_data
|
||||
return self.post_data
|
||||
|
||||
async def _parse_and_render_content_html_post(self, content: dict, title: str, subtitle: str, preview_image_id: str, highlights: list, tags: list) -> tuple[list, str, str]:
|
||||
paragraphs = content["bodyModel"]["paragraphs"]
|
||||
tags_list = [tag["displayTitle"] for tag in tags]
|
||||
out_paragraphs = []
|
||||
current_pos = 0
|
||||
|
||||
def parse_paragraph_text(text: str, markups: list, is_code: bool = False) -> str:
|
||||
if is_code:
|
||||
quote_html_type = ["minimal"]
|
||||
else:
|
||||
quote_html_type = ["full"]
|
||||
text_formater = RLStringHelper(text, quote_html_type=quote_html_type)
|
||||
|
||||
parsed_markups = parse_markups(markups)
|
||||
fixed_markups = split_overlapping_ranges(parsed_markups)
|
||||
|
||||
for markup in fixed_markups:
|
||||
text_formater.set_template(markup["start"], markup["end"], markup["template"])
|
||||
|
||||
return text_formater
|
||||
|
||||
while len(paragraphs) > current_pos:
|
||||
paragraph = paragraphs[current_pos]
|
||||
logger.trace(f"Current paragraph #{current_pos} data: {paragraph}")
|
||||
|
||||
# For debugging stuff...
|
||||
# if paragraph["id"] != "":
|
||||
# current_pos += 1
|
||||
# continue
|
||||
|
||||
if current_pos in range(4):
|
||||
if paragraph["type"] in ["H3", "H4", "H2"]:
|
||||
if getting_percontage_of_match(paragraph["text"], title) > 80:
|
||||
logger.trace("Title was detected, ignore...")
|
||||
current_pos += 1
|
||||
continue
|
||||
if paragraph["type"] in ["H4"]:
|
||||
if paragraph["text"] in tags_list:
|
||||
logger.trace("Tag was detected, ignore...")
|
||||
current_pos += 1
|
||||
continue
|
||||
if paragraph["type"] in ["H4", "P"]:
|
||||
is_paragraph_subtitle = getting_percontage_of_match(paragraph["text"], subtitle) > 80
|
||||
if is_paragraph_subtitle and not subtitle.endswith("…"):
|
||||
logger.trace("Subtitle was detected, ignore...")
|
||||
subtitle = paragraph["text"]
|
||||
current_pos += 1
|
||||
continue
|
||||
elif subtitle and subtitle.endswith("…") and len(paragraph["text"]) > 100:
|
||||
subtitle = None
|
||||
elif paragraph["type"] == "IMG":
|
||||
if paragraph["metadata"]["id"] == preview_image_id:
|
||||
logger.trace("Preview image was detected, ignore...")
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
if paragraph["text"] is None:
|
||||
text_formater = None
|
||||
else:
|
||||
text_formater = parse_paragraph_text(paragraph["text"], paragraph["markups"])
|
||||
|
||||
for highlight in highlights:
|
||||
for highlight_paragraph in highlight["paragraphs"]:
|
||||
if highlight_paragraph["name"] == paragraph["name"]:
|
||||
logger.trace("Apply highlight to this paragraph")
|
||||
if highlight_paragraph["text"] != text_formater.get_text():
|
||||
logger.warning("Highlighted text and paragraph text are not the same! Skip...")
|
||||
break
|
||||
quote_markup_template = '<mark style="background-color: rgb(200 227 200);">{{ text }}</mark>'
|
||||
text_formater.set_template(
|
||||
highlight["startOffset"],
|
||||
highlight["endOffset"],
|
||||
quote_markup_template,
|
||||
)
|
||||
break
|
||||
|
||||
if paragraph["type"] == "H2":
|
||||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-12")
|
||||
header_template = jinja_env.from_string('<h2 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h2>')
|
||||
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "H3":
|
||||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-12")
|
||||
header_template = jinja_env.from_string('<h3 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h3>')
|
||||
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "H4":
|
||||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-8")
|
||||
header_template = jinja_env.from_string('<h4 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-l md:text-xl {{ css_class }}">{{ text }}</h4>')
|
||||
header_template_rendered = await header_template.render_async(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "IMG":
|
||||
image_template = jinja_env.from_string(
|
||||
'<div class="mt-7"><img alt="{{ paragraph.metadata.alt }}" style="margin: auto;" class="pt-5 lazy" role="presentation" data-src="https://miro.medium.com/v2/resize:fit:700/{{ paragraph.metadata.id }}"></div>'
|
||||
)
|
||||
image_caption_template = jinja_env.from_string(
|
||||
"<figcaption class='mt-3 text-sm text-center text-gray-500 dark:text-gray-200'>{{ text }}</figcaption>"
|
||||
)
|
||||
if paragraph["layout"] == "OUTSET_ROW":
|
||||
image_templates_row = []
|
||||
img_row_template = jinja_env.from_string('<div class="mx-5"><div class="flex flex-row justify-center">{{ images }}</div></div>')
|
||||
image_template_rendered = await image_template.render_async(paragraph=paragraph)
|
||||
image_templates_row.append(image_template_rendered)
|
||||
_tmp_current_pos = current_pos + 1
|
||||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["layout"] == "OUTSET_ROW_CONTINUE":
|
||||
image_template_rendered = await image_template.render_async(paragraph=_paragraph)
|
||||
image_templates_row.append(image_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
img_row_template_rendered = await img_row_template.render_async(images="".join(image_templates_row))
|
||||
out_paragraphs.append(img_row_template_rendered)
|
||||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
else:
|
||||
image_template_rendered = await image_template.render_async(paragraph=paragraph)
|
||||
out_paragraphs.append(image_template_rendered)
|
||||
if paragraph["text"]:
|
||||
out_paragraphs.append(await image_caption_template.render_async(text=text_formater.get_text()))
|
||||
elif paragraph["type"] == "P":
|
||||
css_class = ["leading-8"]
|
||||
paragraph_template = jinja_env.from_string('<p class="{{ css_class }}">{{ text }}</p>')
|
||||
if paragraphs[current_pos - 1]["type"] in ["H4", "H3"]:
|
||||
css_class.append("mt-3")
|
||||
else:
|
||||
css_class.append("mt-7")
|
||||
paragraph_template_rendered = await paragraph_template.render_async(text=text_formater.get_text(), css_class=" ".join(css_class))
|
||||
out_paragraphs.append(paragraph_template_rendered)
|
||||
elif paragraph["type"] == "ULI":
|
||||
uli_template = jinja_env.from_string('<ul class="list-disc pl-8 mt-2">{{ li }}</ul>')
|
||||
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
|
||||
li_templates = []
|
||||
|
||||
_tmp_current_pos = current_pos
|
||||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "ULI":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
|
||||
li_template_rendered = await li_template.render_async(text=text_formater.get_text())
|
||||
li_templates.append(li_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
uli_template_rendered = await uli_template.render_async(li="".join(li_templates))
|
||||
out_paragraphs.append(uli_template_rendered)
|
||||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
elif paragraph["type"] == "OLI":
|
||||
ol_template = jinja_env.from_string('<ol class="list-decimal pl-8 mt-2">{{ li }}</ol>')
|
||||
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
|
||||
li_templates = []
|
||||
|
||||
_tmp_current_pos = current_pos
|
||||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "OLI":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
|
||||
li_template_rendered = await li_template.render_async(text=text_formater.get_text())
|
||||
li_templates.append(li_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
ol_template_rendered = await ol_template.render_async(li="".join(li_templates))
|
||||
out_paragraphs.append(ol_template_rendered)
|
||||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
elif paragraph["type"] == "PRE":
|
||||
pre_template = jinja_env.from_string('<pre class="p-4 mt-7 bg-gray-100 dark:bg-gray-900 flex flex-col justify-center">{{code_block}}</pre>')
|
||||
code_block_template = jinja_env.from_string('<code class="overflow-x-auto mt-1 {{ code_css_class }} bg-gray-100 dark:bg-gray-900">{{ text }}</code>')
|
||||
|
||||
code_css_class = []
|
||||
if paragraph["codeBlockMetadata"] and paragraph["codeBlockMetadata"]["lang"] is not None:
|
||||
code_css_class.append(f'language-{paragraph["codeBlockMetadata"]["lang"]}')
|
||||
else:
|
||||
code_css_class.append('nohighlight')
|
||||
|
||||
code_list = []
|
||||
_tmp_current_pos = current_pos
|
||||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "PRE":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"], is_code=True)
|
||||
code_list.append(text_formater.get_text())
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
code_block_template_rendered = await code_block_template.render_async(text="\n".join(code_list), code_css_class=" ".join(code_css_class))
|
||||
pre_template_rendered = await pre_template.render_async(code_block=code_block_template_rendered)
|
||||
|
||||
out_paragraphs.append(pre_template_rendered)
|
||||
current_pos = _tmp_current_pos - 1
|
||||
elif paragraph["type"] == "BQ":
|
||||
bq_template = jinja_env.from_string('<blockquote class="px-5 pt-3 pb-3 mt-5 shadow-lf"><p style="font-style: italic;">{{ text }}</p></blockquote>')
|
||||
bq_template_rendered = await bq_template.render_async(text=text_formater.get_text())
|
||||
logger.trace(bq_template_rendered)
|
||||
out_paragraphs.append(bq_template_rendered)
|
||||
elif paragraph["type"] == "PQ":
|
||||
pq_template = jinja_env.from_string('<blockquote class="mt-7 text-2xl ml-5 text-gray-600 dark:text-gray-300"><p>{{ text }}</p></blockquote>')
|
||||
pq_template_rendered = await pq_template.render_async(text=text_formater.get_text())
|
||||
logger.trace(pq_template_rendered)
|
||||
out_paragraphs.append(pq_template_rendered)
|
||||
elif paragraph["type"] == 'MIXTAPE_EMBED':
|
||||
embed_template = jinja_env.from_string("""
|
||||
<div class="flex border border-gray-300 p-2 mt-7 items-center overflow-hidden"><a rel="noopener follow" href="{{ url }}" target="_blank"> <div class="flex flex-row justify-between p-2 overflow-hidden"><div class="flex flex-col justify-center p-2"><h2 class="text-black dark:text-gray-100 text-base font-bold">{{ embed_title }}</h2><div class="mt-2 block"><h3 class="text-grey-darker text-sm">{{ embed_description }}</h3></div><div class="mt-5" style=""><p class="text-grey-darker text-xs">{{ embed_site }}</p></div></div><div class="relative flex flew-row h-40 w-72"><div class="lazy absolute inset-0 bg-cover bg-center" data-bg="https://miro.medium.com/v2/resize:fit:320/{{ paragraph.mixtapeMetadata.thumbnailImageId }}"></div></div></div> </a></div>
|
||||
""")
|
||||
if paragraph.get("mixtapeMetadata") is not None:
|
||||
url = paragraph["mixtapeMetadata"]["href"]
|
||||
else:
|
||||
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't get url")
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
text_raw = paragraph["text"]
|
||||
|
||||
if len(paragraph["markups"]) != 3:
|
||||
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't split text")
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
title_range = paragraph["markups"][1]
|
||||
description_range = paragraph["markups"][2]
|
||||
|
||||
embed_title = text_raw[title_range["start"]:title_range["end"]]
|
||||
embed_description = text_raw[description_range["start"]:description_range["end"]]
|
||||
try:
|
||||
embed_site = tld.get_fld(url)
|
||||
except Exception as ex:
|
||||
logger.warning(f"Can't get embed site fld: {ex}. Using custom logic...")
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
embed_site = parsed_url.hostname
|
||||
|
||||
embed_template_rendered = await embed_template.render_async(paragraph=paragraph, url=url, embed_title=embed_title, embed_description=embed_description, embed_site=embed_site)
|
||||
out_paragraphs.append(embed_template_rendered)
|
||||
elif paragraph["type"] == "IFRAME":
|
||||
iframe_template = jinja_env.from_string('<div class="mt-7"><iframe class="lazy" data-src="{{ host_address }}/render_iframe/{{ iframe_id }}" allowfullscreen="" frameborder="0" scrolling="no"></iframe></div>')
|
||||
iframe_template_rendered = await iframe_template.render_async(host_address=self.host_address, iframe_id=paragraph["iframe"]["mediaResource"]["id"])
|
||||
out_paragraphs.append(iframe_template_rendered)
|
||||
|
||||
else:
|
||||
logger.error(f"Unknown {paragraph['type']}: {paragraph}")
|
||||
|
||||
current_pos += 1
|
||||
|
||||
return out_paragraphs, title, subtitle
|
||||
|
||||
async def render_as_html(self, template_folder: str = './templates'):
|
||||
try:
|
||||
result = await self._render_as_html(template_folder)
|
||||
except Exception as ex:
|
||||
raise ex
|
||||
# raise MediumParserException(ex) from ex
|
||||
else:
|
||||
return result
|
||||
|
||||
async def generate_metadata(self, as_dict: bool = False) -> tuple:
|
||||
title = RLStringHelper(self.post_data["data"]["post"]["title"]).get_text() # quote_html=False
|
||||
subtitle = RLStringHelper(self.post_data["data"]["post"]["previewContent"]["subtitle"]).get_text()
|
||||
description = RLStringHelper(textwrap.shorten(subtitle, width=100, placeholder="...")).get_text()
|
||||
preview_image_id = self.post_data["data"]["post"]["previewImage"]["id"]
|
||||
creator = self.post_data["data"]["post"]["creator"]
|
||||
collection = self.post_data["data"]["post"]["collection"]
|
||||
url = self.post_data["data"]["post"]["mediumUrl"]
|
||||
|
||||
reading_time = math.ceil(self.post_data["data"]["post"]["readingTime"])
|
||||
free_access = "No" if self.post_data["data"]["post"]["isLocked"] else "Yes"
|
||||
updated_at = convert_datetime_to_human_readable(self.post_data["data"]["post"]["updatedAt"])
|
||||
first_published_at = convert_datetime_to_human_readable(self.post_data["data"]["post"]["firstPublishedAt"])
|
||||
tags = self.post_data["data"]["post"]["tags"]
|
||||
|
||||
if as_dict:
|
||||
return {"post_id": self.post_id, "title": title, "subtitle": subtitle, "description": description, "url": url, "creator": creator, "collection": collection, "reading_time": reading_time, "free_access": free_access, "updated_at": updated_at, "first_published_at": first_published_at, "preview_image_id": preview_image_id, "tags": tags}
|
||||
|
||||
return title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags
|
||||
|
||||
async def _render_as_html(self, template_folder: str = './templates') -> 'HtmlResult':
|
||||
if not self.post_data:
|
||||
logger.warning(f'No post data found for post ID: {self.post_id}. Querying...')
|
||||
await self.query()
|
||||
|
||||
jinja_template = jinja2.Environment(loader=jinja2.FileSystemLoader(template_folder), enable_async=True)
|
||||
post_template = jinja_template.get_template('post.html')
|
||||
|
||||
title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags = await self.generate_metadata()
|
||||
|
||||
content, title, subtitle = await self._parse_and_render_content_html_post(
|
||||
self.post_data["data"]["post"]["content"],
|
||||
title,
|
||||
subtitle,
|
||||
preview_image_id,
|
||||
self.post_data["data"]["post"]["highlights"],
|
||||
tags
|
||||
)
|
||||
|
||||
post_page_title_raw = "{{ title }} | by {{ creator.name }}"
|
||||
if collection:
|
||||
post_page_title_raw += " | in {{ collection.name }}"
|
||||
post_page_title = jinja_env.from_string(post_page_title_raw)
|
||||
post_page_title_rendered = await post_page_title.render_async(title=title, creator=creator, collection=collection)
|
||||
|
||||
post_context = {
|
||||
"subtitle": subtitle,
|
||||
"title": title,
|
||||
"url": url,
|
||||
"creator": creator,
|
||||
"collection": collection,
|
||||
"readingTime": reading_time,
|
||||
"freeAccess": free_access,
|
||||
"updatedAt": updated_at,
|
||||
"firstPublishedAt": first_published_at,
|
||||
"previewImageId": preview_image_id,
|
||||
"content": content,
|
||||
"tags": tags,
|
||||
}
|
||||
post_template_rendered = await post_template.render_async(post_context)
|
||||
|
||||
return HtmlResult(post_page_title_rendered, description, url, post_template_rendered)
|
||||
|
||||
async def render_as_markdown(self) -> str:
|
||||
raise NotImplementedError("Markdown rendering is not implemented. Please use HTML rendering instead")
|
||||
31
core/medium_parser/db_cache_migration.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import sqlite3
|
||||
import asyncio
|
||||
import pickle
|
||||
from cache_db import SQLiteCacheBackend
|
||||
|
||||
db_path = "../medium_cache.sqlite"
|
||||
|
||||
async def main():
|
||||
conn = sqlite3.connect(db_path)
|
||||
db_cache = SQLiteCacheBackend("medium_db_cache.sqlite")
|
||||
db_cache.init_db()
|
||||
|
||||
c = conn.cursor()
|
||||
|
||||
c.execute("SELECT * FROM responses")
|
||||
|
||||
results = c.fetchall()
|
||||
|
||||
for result in results:
|
||||
value_raw = pickle.loads(result[1])
|
||||
db_cache.push(result[0], await value_raw.text())
|
||||
|
||||
# Close the connections
|
||||
c.close()
|
||||
conn.close()
|
||||
|
||||
db_cache.enable_zstd()
|
||||
|
||||
db_cache.close()
|
||||
|
||||
asyncio.run(main())
|
||||
26
core/medium_parser/exceptions.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
class MediumParserException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PageLoadingError(MediumParserException):
|
||||
pass
|
||||
|
||||
|
||||
class NotValidMediumURL(MediumParserException):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidURL(MediumParserException):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMediumPostURL(MediumParserException):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMediumPostID(MediumParserException):
|
||||
pass
|
||||
|
||||
|
||||
class MediumPostQueryError(MediumParserException):
|
||||
pass
|
||||
48
core/medium_parser/medium_api.py
Normal file
0
core/medium_parser/models/__init__.py
Normal file
9
core/medium_parser/models/html_result.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class HtmlResult:
|
||||
title: str
|
||||
description: str
|
||||
url: str
|
||||
data: str
|
||||
45
core/medium_parser/time.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
def convert_datetime_to_human_readable(unix_time: int):
|
||||
"""Converts a datetime object to a human-readable format.
|
||||
|
||||
Args:
|
||||
unix_time: The datetime object to convert.
|
||||
|
||||
Returns:
|
||||
A human-readable string representing the datetime object.
|
||||
"""
|
||||
datetime_object = datetime.fromtimestamp(unix_time / 1000)
|
||||
|
||||
month_names = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
day = datetime_object.day
|
||||
month = month_names[datetime_object.month - 1]
|
||||
year = datetime_object.year
|
||||
|
||||
human_readable_string = f"{month} {day}, {year}"
|
||||
|
||||
return human_readable_string
|
||||
|
||||
|
||||
def get_unix_ms() -> int:
|
||||
# Get the current date and time
|
||||
current_date_time = datetime.now()
|
||||
|
||||
# Convert to the number of milliseconds since January 1, 1970 (Unix Epoch time)
|
||||
milliseconds_since_epoch = int(current_date_time.timestamp() * 1000)
|
||||
|
||||
return milliseconds_since_epoch
|
||||
240
core/medium_parser/utils.py
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
import hashlib
|
||||
import secrets
|
||||
import difflib
|
||||
import urllib.parse
|
||||
from aiohttp_retry import RetryClient
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from functools import lru_cache
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import aiohttp
|
||||
import string
|
||||
|
||||
from . import retry_options, exceptions
|
||||
|
||||
import tld
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
VALID_ID_CHARS = set(string.ascii_letters + string.digits)
|
||||
|
||||
KNOWN_MEDIUM_NETLOC = ("javascript.plainenglish.io", "python.plainenglish.io", "levelup.gitconnected.com")
|
||||
KNOWN_MEDIUM_DOMAINS = ("medium.com", "towardsdatascience.com", "eand.co", "betterprogramming.pub", "curiouse.co", "betterhumans.pub", "uxdesign.cc")
|
||||
|
||||
NOT_MEDIUM_DOMAINS = ("github.com", "yandex.ru", "yandex.kz", "youtube.com", "nytimes.com", "wsj.com", "reddit.com", "elpais.com", "forbes.com", "bloomberg.com")
|
||||
|
||||
|
||||
def is_valid_url(url):
|
||||
fld = get_fld(url)
|
||||
if not fld:
|
||||
return False
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
return bool(parsed_url.scheme and parsed_url.netloc)
|
||||
|
||||
|
||||
def getting_percontage_of_match(string: str, matched_string: str) -> int:
|
||||
if string is None or matched_string is None:
|
||||
return 0
|
||||
return difflib.SequenceMatcher(None, string, matched_string).ratio() * 100
|
||||
|
||||
|
||||
def generate_random_sha256_hash():
|
||||
# Encode the input string to bytes before hashing
|
||||
random_input_bytes = secrets.token_bytes()
|
||||
# Create the SHA-256 hash object
|
||||
sha256_hash = hashlib.sha256()
|
||||
# Update the hash object with the input bytes
|
||||
sha256_hash.update(random_input_bytes)
|
||||
# Get the hexadecimal representation of the hash
|
||||
sha256_hex = sha256_hash.hexdigest()
|
||||
return sha256_hex
|
||||
|
||||
|
||||
def get_unix_ms() -> int:
|
||||
# Get the current date and time
|
||||
current_date_time = datetime.now()
|
||||
|
||||
# Convert to the number of milliseconds since January 1, 1970 (Unix Epoch time)
|
||||
milliseconds_since_epoch = int(current_date_time.timestamp() * 1000)
|
||||
|
||||
return milliseconds_since_epoch
|
||||
|
||||
|
||||
def unquerify_url(url):
|
||||
"""
|
||||
Sanitizes a URL by removing all query parameters.
|
||||
|
||||
Args:
|
||||
url: The URL to sanitize.
|
||||
|
||||
Returns:
|
||||
A sanitized URL.
|
||||
"""
|
||||
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
query = parsed_url.query
|
||||
if query:
|
||||
parsed_url = parsed_url._replace(query='')
|
||||
sanitized_url = urllib.parse.urlunparse(parsed_url)
|
||||
return sanitized_url.removesuffix("/")
|
||||
|
||||
|
||||
def sanitize_url(url):
|
||||
sanitized_url = url.removesuffix("/page/2")
|
||||
return sanitized_url.removesuffix("/")
|
||||
|
||||
|
||||
def is_valid_medium_post_id_hexadecimal(hex_string: str) -> bool:
|
||||
# Check if the string is a valid hexadecimal string
|
||||
# isalnum()
|
||||
for char in hex_string:
|
||||
if char not in VALID_ID_CHARS:
|
||||
return False
|
||||
|
||||
# Check if the string contains only lowercase hexadecimal characters
|
||||
# if not hex_string.islower():
|
||||
# return False
|
||||
|
||||
# Check if the length of the string is correct for a hexadecimal string (e.g., 10, 11 or 12 characters)
|
||||
if len(hex_string) not in range(8, 13):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def resolve_medium_short_link_v1(short_url_id: str, timeout: int = 5) -> str:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
|
||||
request = await retry_client.get(
|
||||
f"https://rsci.app.link/{short_url_id}",
|
||||
timeout=timeout,
|
||||
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
|
||||
allow_redirects=False,
|
||||
)
|
||||
post_url = request.headers["Location"]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
|
||||
|
||||
async def get_medium_post_id_by_url(url: str, timeout: int = 5) -> str:
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.path.startswith("/p/"):
|
||||
post_id = parsed_url.path.rsplit("/p/")[1]
|
||||
elif parsed_url.netloc == "l.facebook.com" and parsed_url.path.startswith("/l.php"):
|
||||
parsed_query = parse_qs(parsed_url.query)
|
||||
if parsed_query.get("u") and len(parsed_query["u"]) == 1:
|
||||
post_url = parsed_query["u"][0]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
return False
|
||||
elif parsed_url.netloc == "webcache.googleusercontent.com" and parsed_url.path.startswith("/search"):
|
||||
parsed_query = parse_qs(parsed_url.query)
|
||||
if parsed_query.get("q") and len(parsed_query["q"]) == 1:
|
||||
post_url = parsed_query["q"][0].removeprefix("cache:")
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
return False
|
||||
elif parsed_url.netloc == "www.google.com" and parsed_url.path.startswith("/url"):
|
||||
parsed_query = parse_qs(parsed_url.query)
|
||||
if parsed_query.get("url") and len(parsed_query["url"]) == 1:
|
||||
post_url = parsed_query["url"][0]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
elif parsed_query.get("q") and len(parsed_query["q"]) == 1:
|
||||
post_url = parsed_query["q"][0]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
return False
|
||||
elif parsed_url.netloc == "12ft.io":
|
||||
parsed_query = parse_qs(parsed_url.query)
|
||||
if parsed_query.get("q") and len(parsed_query["q"]) == 1:
|
||||
post_url = parsed_query["q"][0]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
return False
|
||||
elif parsed_url.path.startswith("/m/global-identity-2"):
|
||||
parsed_query = parse_qs(parsed_url.query)
|
||||
if parsed_query.get("redirectUrl") and len(parsed_query["redirectUrl"]) == 1:
|
||||
post_url = parsed_query["redirectUrl"][0]
|
||||
return await get_medium_post_id_by_url(post_url)
|
||||
return False
|
||||
elif parsed_url.netloc == "link.medium.com":
|
||||
short_url_id = parsed_url.path.removeprefix("/")
|
||||
return await resolve_medium_short_link_v1(short_url_id, timeout)
|
||||
else:
|
||||
post_url = parsed_url.path.split("/")[-1]
|
||||
post_id = post_url.split("-")[-1]
|
||||
|
||||
if not is_valid_medium_post_id_hexadecimal(post_id):
|
||||
return False
|
||||
|
||||
return post_id
|
||||
|
||||
|
||||
async def get_medium_post_id_by_url_old(url: str, timeout: int = 5) -> str:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
|
||||
request = await retry_client.get(url, timeout=timeout)
|
||||
response = await request.text()
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
type_meta_tag = soup.head.find("meta", property="og:type")
|
||||
if not type_meta_tag or type_meta_tag.get("content") != "article":
|
||||
return False
|
||||
url_meta_tag = soup.head.find("meta", property="al:android:url")
|
||||
if not url_meta_tag or not url_meta_tag.get("content"):
|
||||
return False
|
||||
parsed_url = urlparse(url_meta_tag["content"])
|
||||
path = parsed_url.path.strip("/")
|
||||
parsed_value = path.split("/")[-1]
|
||||
return parsed_value
|
||||
|
||||
|
||||
@lru_cache(maxsize=200)
|
||||
def get_fld(url: str):
|
||||
try:
|
||||
fld = tld.get_fld(url)
|
||||
except Exception as ex:
|
||||
logger.trace(ex)
|
||||
return None
|
||||
else:
|
||||
return fld
|
||||
|
||||
|
||||
async def is_valid_medium_url(url: str, timeout: int = 5) -> bool:
|
||||
"""
|
||||
Check if the url is a valid medium.com url
|
||||
|
||||
First stage of url validation is checking if the domain is in the known medium.com url list. If the domain is in the list, then the url is valid
|
||||
Second stage is checking if the url is valid Medium site by performing a GET request to the url and checking the site name meta tag. If the site name meta tag is Medium, then the url is valid
|
||||
"""
|
||||
# First stage
|
||||
domain = get_fld(url)
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if domain in ["12ft.io", "google.com", "facebook.com", "googleusercontent.com"]:
|
||||
return True
|
||||
|
||||
if domain in NOT_MEDIUM_DOMAINS:
|
||||
raise exceptions.NotValidMediumURL("100% not valid Medium URL")
|
||||
|
||||
if domain in KNOWN_MEDIUM_DOMAINS or parsed_url.netloc in KNOWN_MEDIUM_NETLOC:
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"url '{url}' wasn't detected in known medium domains")
|
||||
|
||||
# Second stage
|
||||
async with aiohttp.ClientSession() as session:
|
||||
retry_client = RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options)
|
||||
|
||||
try:
|
||||
request = await retry_client.get(url, timeout=timeout)
|
||||
except Exception as ex:
|
||||
raise exceptions.PageLoadingError(ex) from ex
|
||||
|
||||
response = await request.text()
|
||||
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
|
||||
if not soup.head:
|
||||
return False
|
||||
|
||||
site_name_meta_tag = soup.head.find("meta", property="og:site_name")
|
||||
|
||||
if not site_name_meta_tag or site_name_meta_tag.get("content") != "Medium":
|
||||
return False
|
||||
|
||||
return True
|
||||
3
core/requirements-dev.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
djlint==1.32.1
|
||||
ruff==0.0.261
|
||||
black==23.7.0
|
||||
8
core/requirements.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
loguru==0.6.0
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
tld==0.13
|
||||
bs4==0.0.1
|
||||
Jinja2==3.1.2
|
||||
beautifulsoup4==4.12.2
|
||||
# git+https://github.com/phiresky/sqlite-zstd.git#egg=sqlite_zstd&subdirectory=python
|
||||
25
core/setup.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
# Function to read the contents of the requirements file
|
||||
def read_requirements():
|
||||
with open('requirements.txt', 'r') as req:
|
||||
return req.read().splitlines()
|
||||
|
||||
setup(
|
||||
name='medium_parser',
|
||||
version='0.1.0',
|
||||
author='Freedium community',
|
||||
author_email='admin@freedium.cfd',
|
||||
description='A parser for Medium posts',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://codeberg.org/Freedium-cfd/web',
|
||||
packages=find_packages(),
|
||||
install_requires=read_requirements(),
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
python_requires='>=3.7',
|
||||
)
|
||||
79
core/templates/post.html
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
<div class="container w-full md:max-w-3xl mx-auto pt-20 break-words">
|
||||
<div class="w-full px-4 md:px-6 text-xl text-gray-800 leading-normal" style="font-family:Georgia,serif">
|
||||
<div class="font-sans">
|
||||
<p class="text-base md:text-sm text-green-500 font-bold pb-3">
|
||||
<a href="{{ url }}" class="text-sm md:text-sm text-green-500 font-bold no-underline hover:underline ">< Go to the original</a>
|
||||
</p>
|
||||
{% if previewImageId %}
|
||||
<img alt="Preview image"
|
||||
style="max-height: 65vh;
|
||||
width: auto;
|
||||
margin: auto"
|
||||
loading="eager"
|
||||
role="presentation"
|
||||
src="https://miro.medium.com/v2/resize:fit:700/{{ previewImageId }}">
|
||||
{% endif %}
|
||||
<h1 class="font-bold font-sans break-normal text-gray-900 pt-6 pb-2 text-3xl md:text-4xl">{{ title }}</h1>
|
||||
{% if subtitle %}<h2 class="font-medium font-sans break-normal text-gray-600 pt-1 pb-3 text-1xl md:text-1xl">{{ subtitle }}</h2>{% endif %}
|
||||
</div>
|
||||
<div class="bg-gray-100 border border-gray-300 m-2">
|
||||
<div class="flex items-center space-x-4 p-4">
|
||||
<div class="flex-shrink-0">
|
||||
<a href="https://medium.com/@{{ creator.username }}" target="_blank" title="{{ creator.bio }}" class="block relative">
|
||||
<img src="https://miro.medium.com/v2/resize:fill:88:88/{{ creator.imageId }}"
|
||||
alt="{{ creator.name }}"
|
||||
class="rounded-full h-11 w-11 no-lightense">
|
||||
<div class="absolute bottom-0 right-0 h-3 w-3 border-2 border-white bg-green-500 rounded-full"></div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="flex-grow">
|
||||
<a href="https://medium.com/@{{ creator.username }}"
|
||||
target="_blank"
|
||||
title="{{ creator.bio }}"
|
||||
class="block font-semibold text-gray-900">{{ creator.name }}</a>
|
||||
<button class="text-sm text-white bg-green-500 px-3 py-1 rounded-lg mt-1">
|
||||
<a href="https://medium.com/@{{ creator.username }}"
|
||||
target="_blank"
|
||||
title="{{ creator.bio }}"
|
||||
class="block text-sm text-white">Follow</a>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="px-4 pb-2">
|
||||
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500">
|
||||
{% if collection %}
|
||||
<a href="https://medium.com/{{ collection.slug }}"
|
||||
title="{{ collection.shortDescription }}"
|
||||
target="_blank"
|
||||
class="flex items-center space-x-1">
|
||||
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ collection.avatar.id }}"
|
||||
alt="{{ collection.name }}"
|
||||
class="h-4 w-4 rounded-full no-lightense">
|
||||
<p>{{ collection.name }}</p>
|
||||
</a>
|
||||
<span>·</span>
|
||||
{% endif %}
|
||||
<span class="text-gray-500">~{{ readingTime }} min read</span>
|
||||
<span class="md:inline">·</span>
|
||||
<span class="text-gray-500">{{ firstPublishedAt }} (Updated: {{ updatedAt }})</span>
|
||||
<span class="md:inline">·</span>
|
||||
<span class="text-yellow-500">Free: {{ freeAccess }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% for paragraph in content %}{{ paragraph }}{% endfor %}
|
||||
<div class="flex flex-wrap gap-2 mt-5">
|
||||
{% for tag in tags %}<a title="{{ tag.displayTitle }}" target="_blank" href="https://medium.com/tag/{{ tag.normalizedTagSlug }}"><span class="text-green-500 bg-green-100 px-2 py-1 rounded-full text-xs">#{{ tag.normalizedTagSlug }}</span></a>{% endfor %}
|
||||
</div>
|
||||
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
|
||||
</div>
|
||||
<style>
|
||||
code {
|
||||
/*font-size: 75%;*/
|
||||
background-color: #e3e2e2;
|
||||
}
|
||||
pre {
|
||||
font-size: 75%;
|
||||
background-color: #e3e2e2;
|
||||
}
|
||||
</style>
|
||||
112
core/tests/example_base_template.html
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
|
||||
<title>{{ page_title }}</title>
|
||||
{% if creator %}
|
||||
<meta name="author" content="{{ creator.name }}" />
|
||||
{% endif %}
|
||||
<meta name="description" content="{{ page_description or 'Your paywall breakthrough for medium.com!' }}" />
|
||||
<meta name="keywords" content="medium, paywall, medium.com, paywall breakthrough" />
|
||||
<link rel="stylesheet" href="https://unpkg.com/tailwindcss@2.2.19/dist/tailwind.min.css"/>
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
|
||||
<link rel="manifest" href="/site.webmanifest">
|
||||
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#00aba9">
|
||||
<meta name="msapplication-TileColor" content="#00aba9">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
<script src="https://cdn.jsdelivr.net/npm/lightense-images@1.0.17/dist/lightense.min.js"></script>
|
||||
</head>
|
||||
|
||||
<body class="bg-gray-100 font-sans leading-normal tracking-normal">
|
||||
<nav id="header" class="fixed w-full z-10 top-0">
|
||||
{% if enable_ads_header %}
|
||||
<div class="w-full bg-yellow-400 text-center py-1 px-4"><p class="text-yellow-900">Place your advertisement here! Contact us at advertise@freedium.com</p></div>
|
||||
{% endif %}
|
||||
|
||||
<div id="progress" class="h-1 z-20 top-0" style="background:linear-gradient(to right, #4dc0b5 var(--scroll), transparent 0);"></div>
|
||||
|
||||
<div class="w-full md:max-w-4xl mx-auto flex flex-wrap items-center justify-between mt-0 py-3">
|
||||
|
||||
<div class="pl-4">
|
||||
<a class="text-green-500 text-base no-underline hover:no-underline font-extrabold text-xl" href="/" onclick="navigateToOrigin()">
|
||||
Freedium βeta
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class="block lg:hidden pr-4">
|
||||
<button id="nav-toggle" class="flex items-center px-3 py-2 border rounded text-gray-500 border-gray-600 hover:text-gray-900 hover:border-green-500 appearance-none focus:outline-none">
|
||||
<svg class="fill-current h-3 w-3" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
|
||||
<title>Menu</title>
|
||||
<path d="M0 3h20v2H0V3zm0 6h20v2H0V9zm0 6h20v2H0v-2z" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="w-full flex-grow lg:flex lg:items-center lg:w-auto hidden lg:block mt-2 lg:mt-0 bg-gray-100 z-20" id="nav-content">
|
||||
<ul class="list-reset lg:flex justify-end flex-1 items-center">
|
||||
<!--
|
||||
<li class="mr-3">
|
||||
<a class="inline-block py-2 px-4 text-gray-900 font-bold no-underline" href="#">Active</a>
|
||||
</li>
|
||||
-->
|
||||
<li class="mr-3">
|
||||
<a class="inline-block text-gray-600 no-underline hover:text-gray-900 hover:text-underline py-2 px-4" href="https://medium.com/">Medium.com</a>
|
||||
</li>
|
||||
<!--
|
||||
<li class="mr-3">
|
||||
<a class="inline-block text-gray-600 no-underline hover:text-gray-900 hover:text-underline py-2 px-4" href="#">link</a>
|
||||
</li>
|
||||
-->
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
{% if enable_ads_header %}
|
||||
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
|
||||
{% endif %}
|
||||
|
||||
{{ body_template }}
|
||||
<script>
|
||||
function navigateToOrigin() {
|
||||
window.location.href = window.location.origin;
|
||||
}
|
||||
</script>
|
||||
|
||||
<script>
|
||||
const h = document.documentElement, b = document.body;
|
||||
const st = 'scrollTop';
|
||||
const sh = 'scrollHeight';
|
||||
const progress = document.getElementById('progress');
|
||||
const header = document.getElementById('header');
|
||||
const navcontent = document.getElementById('nav-content');
|
||||
|
||||
document.addEventListener('scroll', function () {
|
||||
/* Refresh scroll % width */
|
||||
const scroll = (h[st] || b[st]) / ((h[sh] || b[sh]) - h.clientHeight) * 100;
|
||||
progress.style.setProperty('--scroll', scroll + '%');
|
||||
|
||||
/* Apply classes for slide in bar */
|
||||
const shouldAddClass = window.scrollY > 10;
|
||||
|
||||
header.classList.toggle('bg-white', shouldAddClass);
|
||||
header.classList.toggle('shadow', shouldAddClass);
|
||||
navcontent.classList.toggle('bg-gray-100', !shouldAddClass);
|
||||
navcontent.classList.toggle('bg-white', shouldAddClass);
|
||||
});
|
||||
|
||||
document.getElementById('nav-toggle').onclick = function() {
|
||||
document.getElementById("nav-content").classList.toggle("hidden");
|
||||
}
|
||||
|
||||
window.addEventListener('load', function () {
|
||||
Lightense('img:not(.no-lightense)');
|
||||
}, false);
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
44
core/tests/example_test.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
|
||||
import jinja2
|
||||
from loguru import logger
|
||||
from medium_parser.core import MediumParser
|
||||
|
||||
jinja2_env = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader("./"),
|
||||
)
|
||||
|
||||
async def safe_main():
|
||||
try:
|
||||
await main()
|
||||
except Exception as ex:
|
||||
logger.exception(ex)
|
||||
|
||||
|
||||
async def main():
|
||||
logger.remove()
|
||||
# logger.add(sys.stderr, level="INFO")
|
||||
logger.add(sys.stderr, level="TRACE")
|
||||
|
||||
# dl = await MediumParser.from_url("")
|
||||
dl = MediumParser("3d8e0ba02d10", 8, "localhost")
|
||||
query_result = await dl.query(use_cache=False)
|
||||
|
||||
with open("query_result.json", "w") as f:
|
||||
json.dump(query_result, f, indent=2)
|
||||
|
||||
result = await dl.render_as_html()
|
||||
|
||||
with open("medium.html", "w") as f:
|
||||
template = jinja2_env.get_template("example_base_template.html")
|
||||
template_result = template.render(body_template=result.data)
|
||||
f.write(template_result)
|
||||
|
||||
print("See medium.html for the result. Press CTRL-C to exit.")
|
||||
sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(safe_main())
|
||||
BIN
data/original.jpg
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
data/vector-1028x1028.png
Normal file
|
After Width: | Height: | Size: 50 KiB |
83
data/vector-1028x1028.svg
Normal file
|
After Width: | Height: | Size: 65 KiB |
82
data/vector-32x32.svg
Normal file
|
After Width: | Height: | Size: 65 KiB |
2
pyproject.toml
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
[tool.black]
|
||||
line-length = 220
|
||||
3
requirements-dev.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
djlint==1.32.1
|
||||
ruff==0.0.261
|
||||
black==23.7.0
|
||||
2
requirements-fast.txt
Executable file
|
|
@ -0,0 +1,2 @@
|
|||
orjson==3.9.2
|
||||
uvloop==0.17.0
|
||||
12
requirements.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
pickledb==0.9.2
|
||||
html5lib==1.1
|
||||
fastapi-limiter==0.1.5
|
||||
sentry-sdk[fastapi]==1.29.2
|
||||
loguru==0.6.0 # due to: https://github.com/Delgan/loguru/issues/916
|
||||
uvicorn==0.20.0
|
||||
Jinja2==3.1.2
|
||||
fastapi==0.91.0
|
||||
starlette==0.24.0
|
||||
gunicorn==21.2.0
|
||||
redis[hiredis]==4.6.0
|
||||
xkcdpass==1.19.3
|
||||
160
rl_string_helper/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
3
rl_string_helper/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# rl-string-helper
|
||||
|
||||
`RLStringHelper` is designed specifically for use with Medium.com parser as string markup helper. The basic idea is to apply multiple markups, multiple replacements, to the same character positions. Also adapts all characters to UTF-16 encoding. See tests for more information.
|
||||
1
rl_string_helper/requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
loguru==0.6.0
|
||||
2
rl_string_helper/rl_string_helper/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from .string_helper import RLStringHelper, parse_markups, split_overlapping_ranges
|
||||
from .utils import quote_html, quote_symbol
|
||||
39
rl_string_helper/rl_string_helper/logger_trace.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import asyncio
|
||||
import time
|
||||
from functools import wraps
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def trace(func):
|
||||
if asyncio.iscoroutinefunction(func):
|
||||
logger.trace(f"{func.__name__!r} function is a coroutine")
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_ts = time.time()
|
||||
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
|
||||
original_result = await func(*args, **kwargs)
|
||||
logger.trace(f"Result: {original_result}")
|
||||
logger.trace(f"Result type: {type(original_result)}")
|
||||
duration_ts = time.time() - start_ts
|
||||
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
|
||||
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
|
||||
return original_result
|
||||
|
||||
else:
|
||||
logger.trace(f"{func.__name__!r} is not a coroutine")
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_ts = time.time()
|
||||
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
|
||||
original_result = func(*args, **kwargs)
|
||||
logger.trace(f"Result: {original_result}")
|
||||
logger.trace(f"Result type: {type(original_result)}")
|
||||
duration_ts = time.time() - start_ts
|
||||
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
|
||||
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
|
||||
return original_result
|
||||
|
||||
return wrapper
|
||||
524
rl_string_helper/rl_string_helper/string_helper.py
Normal file
|
|
@ -0,0 +1,524 @@
|
|||
from loguru import logger
|
||||
|
||||
from .logger_trace import trace
|
||||
from .utils import quote_html, quote_symbol
|
||||
|
||||
from jinja2 import Environment, DebugUndefined, Template
|
||||
|
||||
jinja_env = Environment(undefined=DebugUndefined)
|
||||
|
||||
|
||||
# TODO: doc!
|
||||
class StringAsignmentMix:
|
||||
__slots__ = ("string", "string_list")
|
||||
|
||||
def __init__(self, string: str):
|
||||
if isinstance(string, str):
|
||||
self.string = string
|
||||
elif isinstance(string, StringAsignmentMix):
|
||||
self.string = string.string
|
||||
else:
|
||||
raise ValueError(f"Incorrect string type: {type(string)}")
|
||||
|
||||
self.string_list = list(self.string)
|
||||
|
||||
def __render_string(self):
|
||||
self.string = "".join(self.string_list)
|
||||
|
||||
def __len__(self):
|
||||
self.__render_string()
|
||||
return len(self.string)
|
||||
|
||||
def pop(self, key):
|
||||
self.string_list.pop(key)
|
||||
# self.__render_string()
|
||||
return self
|
||||
|
||||
def encode(self, encoding: str):
|
||||
self.__render_string()
|
||||
return self.string.encode(encoding)
|
||||
|
||||
def insert(self, key: int, value):
|
||||
self.string_list.insert(key, value)
|
||||
# self.__render_string()
|
||||
return self
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
logger.trace(f"Calling __setitem__ with {key=}, {value=}")
|
||||
self.string_list[key] = value
|
||||
return self
|
||||
|
||||
def __getitem__(self, key):
|
||||
logger.trace(f"Calling __getitem__ with {key=}")
|
||||
str_list_res = self.string_list[key]
|
||||
return "".join(str_list_res)
|
||||
|
||||
def __str__(self):
|
||||
self.__render_string()
|
||||
return self.string
|
||||
|
||||
def __repr__(self):
|
||||
self.__render_string()
|
||||
return self.__str__()
|
||||
|
||||
|
||||
# TODO: more clarified description
|
||||
"""
|
||||
In JavaScript, the `length` property of a String object returns the number of code units (bytes) in the string, which makes use of UTF-16 encoding.
|
||||
In UTF-16, each Unicode character may be encoded as one or two code units (byte). This means that for certain scripts, such as emojis, mathematical symbols, or some Chinese characters,
|
||||
the value returned by length might not match the actual number of Unicode characters in the string.
|
||||
|
||||
Python uses UTF-8 encoding, which each character is encoded as one byte. So here is a workaround to get the actual number of characters and manipulate them in string as in UTF-16 encoding. See pre_utf_16_bang and post_utf_16_bang function.
|
||||
"""
|
||||
|
||||
# TODO: doc! Who will read this noodles lol?
|
||||
# TODO: check cases when UTF-16 character can be more that 2 bytes
|
||||
class RLStringHelper:
|
||||
__slots__ = ("string", "templates", "replaces", "quote_html_type", "quote_replaces")
|
||||
|
||||
def __init__(self, string: str, quote_html_type: list[str] = ["full"]):
|
||||
self.string = StringAsignmentMix(quote_symbol(string))
|
||||
self.templates = []
|
||||
self.quote_replaces = []
|
||||
self.replaces = []
|
||||
self.quote_html_type = quote_html_type
|
||||
|
||||
@trace
|
||||
def pre_utf_16_bang(self, string: str, string_pos_matrix: list, _default_bang_char: str = "R"):
|
||||
utf_16_bang_list = []
|
||||
string_len_utf_16 = len(string.encode("utf-16-le")) // 2
|
||||
if string_len_utf_16 == len(string):
|
||||
logger.trace("String is doesn't contain multibyte characters")
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
i = 0
|
||||
while len(string) - 1 > i:
|
||||
new_i = string_pos_matrix[i]
|
||||
char = string[new_i]
|
||||
char_len = len(char.encode("utf-16-le")) // 2
|
||||
if char_len == 2:
|
||||
char_len_dif = char_len - 1
|
||||
logger.trace(char_len_dif)
|
||||
logger.trace(f"'{char}' char is two bytes")
|
||||
# logger.trace(f"'{char}' char is multibyte")
|
||||
char_present = _default_bang_char * char_len_dif
|
||||
logger.trace(f"{char_present=}")
|
||||
string, string_pos_matrix = self._paste_char(string, string_pos_matrix, new_i + 1, char_present)
|
||||
i += 1
|
||||
utf_16_bang_list.append((i, char_len_dif, i))
|
||||
elif char_len == 1:
|
||||
logger.trace(f"'{char}' char is single byte")
|
||||
pass
|
||||
else:
|
||||
ValueError(f"Invalid char: {char}")
|
||||
|
||||
i += 1
|
||||
logger.trace(utf_16_bang_list)
|
||||
logger.trace(string_pos_matrix)
|
||||
logger.trace(len(string))
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
def _paste_char(self, string: str, string_pos_matrix: list, pos: int, char: str):
|
||||
char_len = len(char)
|
||||
string_pos_matrix.insert(pos, string_pos_matrix[pos])
|
||||
for matrix_i, matrix in enumerate(string_pos_matrix[pos + 1:], pos + 1):
|
||||
string_pos_matrix[matrix_i] += char_len
|
||||
string.insert(pos, char)
|
||||
return string, string_pos_matrix
|
||||
|
||||
def _delete_char(self, string: str, string_pos_matrix: list, pos: int, char_len: int, old_pos: int):
|
||||
string.pop(pos)
|
||||
string_pos_matrix.pop(old_pos)
|
||||
for matrix_i, matrix in enumerate(string_pos_matrix[pos:], pos):
|
||||
if isinstance(string_pos_matrix[matrix_i], int):
|
||||
string_pos_matrix[matrix_i] -= char_len
|
||||
elif isinstance(string_pos_matrix[matrix_i], tuple):
|
||||
string_pos_matrix[matrix_i] = (string_pos_matrix[matrix_i][0] - char_len, string_pos_matrix[matrix_i][1] - char_len)
|
||||
return string, string_pos_matrix
|
||||
|
||||
@trace
|
||||
def post_utf_16_bang(self, string: str, string_pos_matrix: list, utf_16_bang_list: list, _default_bang_char: str = "R"):
|
||||
string = StringAsignmentMix(string)
|
||||
|
||||
post_transbang = 0
|
||||
for bang_pos, char_len, old_pos in utf_16_bang_list:
|
||||
string, string_pos_matrix = self._delete_char(string, string_pos_matrix, bang_pos - post_transbang, char_len, old_pos - post_transbang)
|
||||
post_transbang += char_len
|
||||
|
||||
logger.trace(utf_16_bang_list)
|
||||
logger.trace(string_pos_matrix)
|
||||
return string, string_pos_matrix
|
||||
|
||||
@trace
|
||||
def set_template(self, start: int, end: int, template: str):
|
||||
if not isinstance(template, Template):
|
||||
template = jinja_env.from_string(template)
|
||||
lazy_template = (start, end), template
|
||||
self.templates.append(lazy_template)
|
||||
logger.trace(self.templates)
|
||||
|
||||
@trace
|
||||
def set_replace(self, start: int, end: int, replace_with: str):
|
||||
lazy_replace = (start, end), replace_with
|
||||
self.replaces.append(lazy_replace)
|
||||
logger.trace(self.replaces)
|
||||
|
||||
def _render_templates(self, string: str, string_pos_matrix: list, utf_16_bang_list: list):
|
||||
if not self.templates:
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
templates = self.templates
|
||||
templates.reverse()
|
||||
|
||||
older_text = string
|
||||
updated_text = string
|
||||
|
||||
logger.trace(string_pos_matrix)
|
||||
|
||||
@trace
|
||||
def _get_prefix_len(template_raw: Template, inner_char: str = "{"):
|
||||
prefix_len = 0
|
||||
template = template_raw.render()
|
||||
for i in range(len(template)):
|
||||
if template[i] == inner_char:
|
||||
return prefix_len
|
||||
prefix_len += 1
|
||||
else:
|
||||
raise ValueError(f"Invalid template: {template}")
|
||||
|
||||
@trace
|
||||
def _get_suffix_len(template_raw: Template, outer_char: str = "}"):
|
||||
suffix_len = 0
|
||||
template = template_raw.render()
|
||||
for i in range(len(template) - 1, -1, -1):
|
||||
if template[i] == outer_char:
|
||||
return suffix_len
|
||||
suffix_len += 1
|
||||
else:
|
||||
raise ValueError(f"Invalid template: {template}")
|
||||
|
||||
@trace
|
||||
def update_nested_positions(start, end, prefix_len, suffix_len):
|
||||
logger.trace(len(self.string) == len(string_pos_matrix))
|
||||
logger.trace(f"{len(self.string)=}")
|
||||
for i in range(end, len(string_pos_matrix)):
|
||||
logger.trace(f"{i=}")
|
||||
logger.trace(f"{string_pos_matrix[i]=}")
|
||||
string_pos_matrix[i] = string_pos_matrix[i] + suffix_len + prefix_len
|
||||
|
||||
for i in range(start, end):
|
||||
string_pos_matrix[i] = string_pos_matrix[i] + prefix_len
|
||||
|
||||
for n in range(len(utf_16_bang_list)):
|
||||
utf_16_bang = utf_16_bang_list[n]
|
||||
if utf_16_bang[2] > end:
|
||||
utf_16_bang_list[n] = (utf_16_bang[0] + prefix_len + suffix_len, utf_16_bang[1], utf_16_bang[2])
|
||||
elif utf_16_bang[2] > start:
|
||||
utf_16_bang_list[n] = (utf_16_bang[0] + prefix_len, utf_16_bang[1], utf_16_bang[2])
|
||||
|
||||
logger.trace(string_pos_matrix)
|
||||
logger.trace(utf_16_bang_list)
|
||||
|
||||
logger.trace(string_pos_matrix)
|
||||
|
||||
for (start, end), template in templates:
|
||||
logger.trace(older_text == updated_text)
|
||||
logger.trace(f"{updated_text}")
|
||||
|
||||
logger.trace(f"{start=}, {end=}, {template=}")
|
||||
|
||||
if start >= len(string_pos_matrix):
|
||||
logger.warning("Start position is out of range. Ignore...")
|
||||
continue
|
||||
elif end - 1 >= len(string_pos_matrix):
|
||||
logger.warning("End position is out of range. Using workaround.")
|
||||
while end - 1 >= len(string_pos_matrix):
|
||||
end -= 1
|
||||
|
||||
if start == end:
|
||||
logger.warning("Start and end positions are the same")
|
||||
continue
|
||||
|
||||
logger.trace(f"{len(string_pos_matrix)=}")
|
||||
|
||||
new_start, new_end = (
|
||||
string_pos_matrix[start],
|
||||
string_pos_matrix[end - 1] + 1,
|
||||
)
|
||||
|
||||
if new_end < new_start:
|
||||
logger.error(f"Invalid negative range: {new_start=} {new_end=}. Ignore.....")
|
||||
# we had to ignore this error since we need to release new version
|
||||
# raise ValueError(f"Invalid negative range: {new_start=} {new_end=}")
|
||||
continue
|
||||
|
||||
logger.trace(f"{new_start=}, {new_end=}")
|
||||
|
||||
logger.trace(updated_text[new_start:new_end])
|
||||
|
||||
older_text = updated_text
|
||||
logger.trace(f"{older_text=}")
|
||||
|
||||
context_text = template.render(text=older_text[new_start:new_end])
|
||||
logger.trace(context_text)
|
||||
updated_text_template = jinja_env.from_string("{{ updated_text[:new_start] }}{{ context_text }}{{updated_text[new_end:]}}")
|
||||
updated_text = updated_text_template.render(updated_text=updated_text, context_text=context_text, new_start=new_start, new_end=new_end)
|
||||
logger.trace(updated_text)
|
||||
|
||||
prefix_len = _get_prefix_len(template)
|
||||
suffix_len = _get_suffix_len(template)
|
||||
|
||||
update_nested_positions(start, end, prefix_len, suffix_len)
|
||||
|
||||
logger.trace(string_pos_matrix)
|
||||
|
||||
return updated_text, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
@trace
|
||||
def _render_replaces(self, string: str, string_pos_matrix: list, utf_16_bang_list: list):
|
||||
if not self.replaces and not self.quote_replaces:
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
string = StringAsignmentMix(string)
|
||||
replaces = self.replaces + self.quote_replaces
|
||||
|
||||
@trace
|
||||
def update_positions(start: int, end: int, replace_len: int, new_start: int, new_end: int):
|
||||
pos_len = len(range(start, end))
|
||||
logger.trace(pos_len)
|
||||
pos_len_diff = replace_len - pos_len
|
||||
logger.trace(pos_len_diff)
|
||||
for pos_index, pos_matrix in enumerate(string_pos_matrix[end:], end):
|
||||
if isinstance(pos_matrix, int):
|
||||
string_pos_matrix[pos_index] += pos_len_diff
|
||||
elif isinstance(pos_matrix, tuple):
|
||||
string_pos_matrix[pos_index] = (
|
||||
string_pos_matrix[pos_index][0] + pos_len_diff,
|
||||
string_pos_matrix[pos_index][1] + pos_len_diff,
|
||||
)
|
||||
|
||||
if pos_len_diff != 0:
|
||||
for i in range(start, end):
|
||||
if isinstance(string_pos_matrix[i], int):
|
||||
string_pos_matrix[i] = (
|
||||
string_pos_matrix[i],
|
||||
string_pos_matrix[i] + replace_len,
|
||||
)
|
||||
elif isinstance(string_pos_matrix[i], tuple):
|
||||
string_pos_matrix[i] = (
|
||||
string_pos_matrix[i][0] + replace_len,
|
||||
string_pos_matrix[i][1] + replace_len,
|
||||
)
|
||||
|
||||
for n in range(len(utf_16_bang_list)):
|
||||
utf_16_bang = utf_16_bang_list[n]
|
||||
if utf_16_bang[0] > end:
|
||||
utf_16_bang_list[n] = (utf_16_bang[0] + pos_len_diff, utf_16_bang[1], utf_16_bang[2])
|
||||
|
||||
logger.trace(string_pos_matrix)
|
||||
|
||||
for (start, end), replace_with in replaces:
|
||||
new_start, new_end = string_pos_matrix[start], string_pos_matrix[end - 1]
|
||||
if isinstance(new_end, int):
|
||||
new_end += 1
|
||||
|
||||
if isinstance(new_start, tuple) or isinstance(new_end, tuple):
|
||||
if isinstance(new_start, tuple):
|
||||
new_start_tmp = list(range(new_start[0], new_start[1] + 1))
|
||||
else:
|
||||
new_start_tmp = [new_start]
|
||||
|
||||
if isinstance(new_end, tuple):
|
||||
new_end_tmp = list(range(new_end[0], new_end[1] + 1))
|
||||
else:
|
||||
new_end_tmp = [new_end]
|
||||
|
||||
new_range = new_start_tmp + new_end_tmp
|
||||
logger.trace(new_range)
|
||||
new_start, new_end = min(new_range), max(new_range)
|
||||
|
||||
logger.trace(f"{new_start=}, {new_end=}")
|
||||
|
||||
logger.trace(string[new_start:new_end])
|
||||
|
||||
string[new_start:new_end] = replace_with
|
||||
logger.trace(string)
|
||||
|
||||
update_positions(start, end, len(replace_with), new_start, new_end)
|
||||
logger.trace(string_pos_matrix)
|
||||
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
@trace
|
||||
def __str__(self):
|
||||
string = StringAsignmentMix(self.string)
|
||||
|
||||
string_pos_matrix = [pos for pos in range(len(string))]
|
||||
updated_text, string_pos_matrix, utf_16_bang_list = self.pre_utf_16_bang(string, string_pos_matrix)
|
||||
|
||||
if self.quote_html_type:
|
||||
self.quote_replaces = []
|
||||
html_quote_replaces = quote_html(str(updated_text), self.quote_html_type)
|
||||
for html_quote in html_quote_replaces:
|
||||
self.quote_replaces.append(html_quote)
|
||||
|
||||
if not self.templates and not self.replaces and not self.quote_replaces:
|
||||
logger.debug("No templates, no replaces, no quote_replaces")
|
||||
return str(self.string)
|
||||
|
||||
updated_text, string_pos_matrix, utf_16_bang_list = self._render_templates(updated_text, string_pos_matrix, utf_16_bang_list)
|
||||
updated_text, string_pos_matrix, utf_16_bang_list = self._render_replaces(updated_text, string_pos_matrix, utf_16_bang_list)
|
||||
updated_text, string_pos_matrix = self.post_utf_16_bang(updated_text, string_pos_matrix, utf_16_bang_list)
|
||||
return str(updated_text)
|
||||
|
||||
def get_text(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
def split_overlapping_ranges(markups):
|
||||
last_fixed_markup = markups
|
||||
for _ in range(len(markups) * 7):
|
||||
markups = split_overlapping_range_position(markups)
|
||||
if last_fixed_markup and len(last_fixed_markup) == len(markups):
|
||||
break
|
||||
last_fixed_markup = markups
|
||||
return last_fixed_markup
|
||||
|
||||
|
||||
def split_overlapping_range_position(positions):
|
||||
if not positions:
|
||||
return []
|
||||
|
||||
# Sort the positions by start
|
||||
positions.sort(key=lambda x: x["start"])
|
||||
logger.trace(positions)
|
||||
|
||||
# Initialize the result list with the first position
|
||||
result = [positions[0]]
|
||||
logger.trace(result)
|
||||
|
||||
for pos in positions[1:]:
|
||||
logger.trace(pos)
|
||||
last = result[-1]
|
||||
|
||||
# If the current position overlaps with the last one in the result
|
||||
if pos["start"] < last["end"]:
|
||||
logger.trace(0)
|
||||
# If the current position has a different markup and ends before the last one
|
||||
if pos["type"] != last["type"] and pos["end"] < last["end"]:
|
||||
logger.trace(1)
|
||||
# Split the last position into three
|
||||
result[-1] = {
|
||||
"start": last["start"],
|
||||
"end": pos["start"],
|
||||
"type": last["type"],
|
||||
"template": last["template"],
|
||||
}
|
||||
logger.trace(result)
|
||||
result.append(
|
||||
{
|
||||
"start": pos["start"],
|
||||
"end": pos["end"],
|
||||
"type": pos["type"],
|
||||
"template": pos["template"],
|
||||
}
|
||||
)
|
||||
logger.trace(result)
|
||||
result.append(
|
||||
{
|
||||
"start": pos["start"],
|
||||
"end": pos["end"],
|
||||
"type": last["type"],
|
||||
"template": last["template"],
|
||||
}
|
||||
)
|
||||
logger.trace(result)
|
||||
result.append(
|
||||
{
|
||||
"start": pos["end"],
|
||||
"end": last["end"],
|
||||
"type": last["type"],
|
||||
"template": last["template"],
|
||||
}
|
||||
)
|
||||
logger.trace(result)
|
||||
elif pos["type"] != last["type"]:
|
||||
logger.trace(2)
|
||||
# Split the last position into two, updating end of the last position
|
||||
result[-1] = {
|
||||
"start": last["start"],
|
||||
"end": pos["start"],
|
||||
"type": last["type"],
|
||||
"template": last["template"],
|
||||
}
|
||||
logger.trace(result)
|
||||
result.append(
|
||||
{
|
||||
"start": pos["start"],
|
||||
"end": pos["end"],
|
||||
"type": pos["type"],
|
||||
"template": pos["template"],
|
||||
}
|
||||
)
|
||||
logger.trace(result)
|
||||
result.append(
|
||||
{
|
||||
"start": pos["start"],
|
||||
"end": last["end"],
|
||||
"type": last["type"],
|
||||
"template": last["template"],
|
||||
}
|
||||
)
|
||||
logger.trace(result)
|
||||
else:
|
||||
logger.trace(3)
|
||||
# Update the end of the last position in the result
|
||||
result[-1]["end"] = max(last["end"], pos["end"])
|
||||
logger.trace(result)
|
||||
else:
|
||||
logger.trace(4)
|
||||
# Add the current position to the result
|
||||
result.append(pos)
|
||||
logger.trace(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def raw_render(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if isinstance(value, str):
|
||||
kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}"
|
||||
return kwargs
|
||||
|
||||
|
||||
def parse_markups(markups: list):
|
||||
markups_out = []
|
||||
|
||||
for markup in markups:
|
||||
logger.trace(markup)
|
||||
if markup["type"] == "A":
|
||||
if markup["anchorType"] == "LINK":
|
||||
template = jinja_env.from_string('<a class="text-base" style="text-decoration: underline;" rel="{{rel}}" title="{{title}}" href="{{href}}" target="_blank">{{text}}</a>')
|
||||
template = template.render(raw_render(rel=markup.get("rel", ""), title=markup.get("title", ""), href=markup["href"]))
|
||||
elif markup["anchorType"] == "USER":
|
||||
template = jinja_env.from_string('<a class="text-base" style="text-decoration: underline;" href="https://medium.com/u/{{userId}}">{{text}}</a>')
|
||||
template = template.render(userId=markup["userId"])
|
||||
else:
|
||||
logger.error(f"Can't proccess 'anchorType': {markup['anchorType']}")
|
||||
continue
|
||||
elif markup["type"] == "STRONG":
|
||||
template = "<strong>{{text}}</strong>"
|
||||
elif markup["type"] == "EM":
|
||||
template = "<em>{{text}}</em>"
|
||||
elif markup["type"] == "CODE":
|
||||
template = "<code class='p-1 dark:bg-gray-600'>{{text}}</code>"
|
||||
else:
|
||||
logger.error(f"Unknown markup type: {markup}")
|
||||
continue
|
||||
|
||||
template = jinja_env.from_string(template)
|
||||
|
||||
markup["template"] = template
|
||||
markups_out.append(markup)
|
||||
|
||||
return markups_out
|
||||
41
rl_string_helper/rl_string_helper/utils.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import re
|
||||
|
||||
|
||||
MINIMAL_QUOTE_PATTERN = re.compile(r"""([&<>])(?!(amp|lt|gt|quot|#39);)""")
|
||||
MINIMAL_QUOTE_REPLACE_WITH = {
|
||||
"<": "<",
|
||||
">": ">",
|
||||
"&": "&",
|
||||
}
|
||||
|
||||
NORMAL_QUOTE_PATTERN = re.compile("|".join(map(re.escape, ['"', "'"])))
|
||||
NORMAL_QUOTE_REPLACE_WITH = {
|
||||
'"': """, # should be escaped in attributes
|
||||
"'": "'", # should be escaped in attributes
|
||||
}
|
||||
|
||||
EXTRA_QUOTE_PATTERN = re.compile("|".join(map(re.escape, ["\n", "\t"]))) # ' '
|
||||
EXTRA_QUOTE_REPLACE_WITH = {"\n": "<br />", "\t": " "} # " ": " "
|
||||
|
||||
QUOTE_SYMBOL = {'”': '"', "“": '"', "‘": "'", "’": "'"}
|
||||
|
||||
|
||||
def quote_symbol(text: str) -> str:
|
||||
for k, v in QUOTE_SYMBOL.items():
|
||||
text = text.replace(k, v)
|
||||
return text
|
||||
|
||||
|
||||
# https://stackoverflow.com/questions/1061697/whats-the-easiest-way-to-escape-html-in-python
|
||||
# XXX: disabling extra quoting as workaround
|
||||
def quote_html(html: str, quote_types: list[str]) -> list[tuple[int, str]]:
|
||||
if 'minimal' in quote_types or 'full' in quote_types or 'extra' in quote_types:
|
||||
for m in MINIMAL_QUOTE_PATTERN.finditer(html):
|
||||
yield m.span(), MINIMAL_QUOTE_REPLACE_WITH[m.group(1)]
|
||||
if 'normal' in quote_types or 'full' in quote_types or 'extra' in quote_types:
|
||||
for m in NORMAL_QUOTE_PATTERN.finditer(html):
|
||||
yield m.span(), NORMAL_QUOTE_REPLACE_WITH[m.group(0)]
|
||||
if 'extra' in quote_types in quote_types:
|
||||
for m in EXTRA_QUOTE_PATTERN.finditer(html):
|
||||
pos = m.span()
|
||||
yield pos, EXTRA_QUOTE_REPLACE_WITH[html[pos[0]:pos[1]]]
|
||||
25
rl_string_helper/setup.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
# Function to read the contents of the requirements file
|
||||
def read_requirements():
|
||||
with open('requirements.txt', 'r') as req:
|
||||
return req.read().splitlines()
|
||||
|
||||
setup(
|
||||
name='rl_string_helper',
|
||||
version='0.1.0',
|
||||
author='Freedium community',
|
||||
author_email='admin@freedium.cfd',
|
||||
description='Helper for Medium parser backend',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://codeberg.org/Freedium-cfd/web',
|
||||
packages=find_packages(),
|
||||
install_requires=read_requirements(),
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
python_requires='>=3.7',
|
||||
)
|
||||
0
rl_string_helper/tests/__init__.py
Normal file
127
rl_string_helper/tests/test_rl_string_helper.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
import sys
|
||||
|
||||
from loguru import logger
|
||||
from rl_string_helper import RLStringHelper, quote_html, parse_markups
|
||||
|
||||
|
||||
class TestRLStringHelper:
|
||||
def setup_method(self):
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, level="TRACE")
|
||||
|
||||
def test_html_quote(self):
|
||||
quoted_string_1 = [i for i in quote_html("<Hello world>")]
|
||||
assert quoted_string_1 == [((0, 1), '<'), ((12, 13), '>')]
|
||||
|
||||
# Test with standard HTML characters
|
||||
html = '<div class="test">Hello & World</div>'
|
||||
result = list(quote_html(html))
|
||||
expected = [((0, 1), '<'), ((11, 12), '"'), ((16, 17), '"'), ((17, 18), '>'), ((24, 25), '&'), ((31, 32), '<'), ((36, 37), '>')]
|
||||
assert result == expected
|
||||
|
||||
# Test with extra characters
|
||||
html = '<div class="test">\nHello & World</div>'
|
||||
result = list(quote_html(html, True))
|
||||
expected = [((0, 1), '<'), ((11, 12), '"'), ((16, 17), '"'), ((17, 18), '>'), ((25, 26), '&'), ((32, 33), '<'), ((37, 38), '>'), ((18, 19), '<br />')]
|
||||
assert result == expected
|
||||
|
||||
# Test with quote characters
|
||||
html = '<div class="test">Hello & \'World\'</div>'
|
||||
result = list(quote_html(html))
|
||||
expected = [((0, 1), '<'), ((11, 12), '"'), ((16, 17), '"'), ((17, 18), '>'), ((24, 25), '&'), ((26, 27), '''), ((32, 33), '''), ((33, 34), '<'), ((38, 39), '>')]
|
||||
assert result == expected
|
||||
|
||||
def test_basic_template(self):
|
||||
helper = RLStringHelper("Hello world")
|
||||
helper.set_template(0, 5, "<a>{{text}}</a>")
|
||||
assert str(helper) == "<a>Hello</a> world"
|
||||
|
||||
helper.set_template(6, 11, "<b>{{text}}</b>")
|
||||
assert str(helper) == "<a>Hello</a> <b>world</b>"
|
||||
|
||||
helper.set_template(0, 11, "<i>{{text}}</i>")
|
||||
assert str(helper) == "<i><a>Hello</a> <b>world</b></i>"
|
||||
|
||||
def test_basic_replace(self):
|
||||
# Replace A to B - ONE to ONE char
|
||||
helper = RLStringHelper("ABC")
|
||||
helper.set_replace(0, 1, "B")
|
||||
assert str(helper) == "BBC"
|
||||
|
||||
# Replace first B to AA - ONE to TWO chars
|
||||
helper.set_replace(0, 1, "AA")
|
||||
assert str(helper) == "AABC"
|
||||
|
||||
# Replace C to D - ONE to ONE char
|
||||
helper.set_replace(2, 3, "D")
|
||||
assert str(helper) == "AABD"
|
||||
|
||||
# Replace BD to R - TWO to ONE char
|
||||
helper.set_replace(1, 3, "R")
|
||||
assert str(helper) == "AAR"
|
||||
|
||||
# Replace AA to CD
|
||||
helper.set_replace(0, 2, "CD")
|
||||
assert str(helper) == "CD"
|
||||
|
||||
def test_multibyte_replace(self):
|
||||
helper = RLStringHelper("TESERT - 📊 - ABC")
|
||||
helper.set_replace(0, 6, "B")
|
||||
assert helper.get_text() == "B - 📊 - ABC"
|
||||
|
||||
helper = RLStringHelper("Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.")
|
||||
helper.set_template(0, 200, "<kr>{{text}}</kr>")
|
||||
helper.set_template(0, 200, "<kz>{{text}}</kz>")
|
||||
assert helper.get_text() == "<kz><kr>Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.</kr></kz>"
|
||||
|
||||
helper = RLStringHelper("TESERT ALMACOM - 📊 - ABC")
|
||||
helper.set_replace(0, 14, "B")
|
||||
assert helper.get_text() == "B - 📊 - ABC"
|
||||
|
||||
helper = RLStringHelper("hello - 📊 - ABC")
|
||||
helper.set_template(0, 5, "<a>{{text}}</a>")
|
||||
assert helper.get_text() == "<a>hello</a> - 📊 - ABC"
|
||||
|
||||
helper = RLStringHelper("ABC 📊 - How are you?")
|
||||
helper.set_template(4, 6, "<a>{{text}}</a>")
|
||||
assert str(helper) == "ABC <a>📊</a> - How are you?"
|
||||
|
||||
helper = RLStringHelper("We have a 📊, a 📊 and a 📊.")
|
||||
helper.set_template(0, 30, "<e>{{text}}</e>")
|
||||
assert helper.get_text() == "<e>We have a 📊, a 📊 and a 📊.</e>"
|
||||
|
||||
def test_romano(self):
|
||||
issue_text = "Whilst academic research papers have highlighted performance issues with the prophet since 2017, the propagation of package popularity through the data science community has been fueled by 𝙗𝙤𝙩𝙝 𝙚𝙭𝙘𝙚𝙨𝙨𝙞𝙫𝙚 𝙘𝙡𝙖𝙞𝙢𝙨 𝙛𝙧𝙤𝙢 𝙩𝙝𝙚 𝙤𝙧𝙞𝙜𝙞𝙣𝙖𝙡 𝙙𝙚𝙫𝙚𝙡𝙤𝙥𝙢𝙚𝙣𝙩 𝙩𝙚𝙖𝙢 𝙗𝙪𝙩 𝙢𝙤𝙧𝙚 𝙞𝙢𝙥𝙤𝙧𝙩𝙖𝙣𝙩𝙡𝙮 𝙗𝙮 𝙢𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 𝙤𝙛 𝙩𝙝𝙚 𝙣𝙤𝙣-𝙥𝙚𝙧𝙛𝙤𝙧𝙢𝙞𝙣𝙜 𝙥𝙖𝙘𝙠𝙖𝙜𝙚 𝙫𝙞𝙖 𝙖𝙧𝙩𝙞𝙘𝙡𝙚𝙨 𝙤𝙣 𝙈𝙚𝙙𝙞𝙪𝙢 𝙖𝙣𝙙 𝙨𝙤𝙘𝙞𝙖𝙡 𝙢𝙚𝙙𝙞𝙖."
|
||||
helper = RLStringHelper(issue_text)
|
||||
assert helper.get_text() == issue_text
|
||||
|
||||
def test_markup_parser(self):
|
||||
href_markup = {
|
||||
"__typename": 'Markup',
|
||||
"anchorType": 'LINK',
|
||||
"end": 12,
|
||||
"href": 'https://readwise.io/bookreview/{{book_id',
|
||||
"name": None,
|
||||
"rel": 'nofollow',
|
||||
"start": 0,
|
||||
"title": '',
|
||||
"type": 'A',
|
||||
"userId": None
|
||||
}
|
||||
|
||||
helper = RLStringHelper("Hello world")
|
||||
markups = parse_markups([href_markup])
|
||||
for markup in markups:
|
||||
helper.set_template(markup["start"], markup["end"], markup["template"])
|
||||
assert helper.get_text() == '<a style="text-decoration: underline;" rel="nofollow" title="" href="https://readwise.io/bookreview/{{book_id" target="_blank">Hello world</a>'
|
||||
|
||||
def test_medium_all(self):
|
||||
helper = RLStringHelper("ABC Hello world")
|
||||
helper.set_replace(0, 1, "B")
|
||||
assert str(helper) == "BBC Hello world"
|
||||
|
||||
helper.set_template(4, 9, "<a>{{text}}</a>")
|
||||
assert str(helper) == "BBC <a>Hello</a> world"
|
||||
|
||||
helper.set_template(10, 15, "<b>{{text}}</b>")
|
||||
assert str(helper) == "BBC <a>Hello</a> <b>world</b>"
|
||||
11
ruff.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
target-version = "py37"
|
||||
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"UP", # pyupgrade,
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
line-length = 120
|
||||
per-file-ignores = {"__init__.py" = ["F401"]}
|
||||
8
scripts/build.sh
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
# pip install nuitka==1.8
|
||||
# sudo apt install patchelf ccache -y
|
||||
# sudo /usr/sbin/update-ccache-symlinks
|
||||
# export PATH="/usr/lib/ccache:$PATH"
|
||||
|
||||
python3 -m nuitka --standalone --nofollow-import-to=pytest --python-flag=nosite,-O,isolated --plugin-enable=anti-bloat,implicit-imports,data-files,pylint-warnings --warn-implicit-exceptions --warn-unusual-code --prefer-source-code --include-package=uvicorn.workers --verbose --show-modules --show-memory --show-progress --show-scons server # --low-memory
|
||||
37
scripts/check_health.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
|
||||
from aiogram import Bot
|
||||
from loguru import logger
|
||||
|
||||
BOT_TOKEN = os.getenv("BOT_TOKEN")
|
||||
|
||||
if not BOT_TOKEN:
|
||||
raise ValueError("No bot token!")
|
||||
|
||||
bot = Bot(BOT_TOKEN)
|
||||
|
||||
ADMIN_CHAT_ID = "1621425349"
|
||||
SLEEP_TIME = 15 * 60
|
||||
|
||||
|
||||
async def main():
|
||||
while True:
|
||||
logger.debug("Checking health of freedium.cfd")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get("https://freedium.cfd", timeout=3) as response:
|
||||
response_status = response.status
|
||||
except Exception as ex:
|
||||
logger.exception(ex)
|
||||
response_status = "ERROR"
|
||||
finally:
|
||||
if response_status != 200:
|
||||
await bot.send_message(ADMIN_CHAT_ID, "EMERGENCY! SITE IS DOWN!!!")
|
||||
|
||||
logger.debug("Sleeping ...")
|
||||
await asyncio.sleep(SLEEP_TIME)
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
2
scripts/dev_clean.sh
Executable file
|
|
@ -0,0 +1,2 @@
|
|||
ruff check ./ --fix
|
||||
black .
|
||||
3
scripts/disable_redis.sh
Executable file
|
|
@ -0,0 +1,3 @@
|
|||
sudo systemctl status redis
|
||||
sudo systemctl stop redis
|
||||
sudo systemctl disable redis
|
||||
54
scripts/generate_caddy_rules.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
OUTPUT_RULES = []
|
||||
|
||||
|
||||
static_files = [f for f in listdir("./static") if isfile(join("./static", f))]
|
||||
|
||||
static_file_template = """
|
||||
handle_path /{{ file }} {
|
||||
root * ./static/{{ file }}
|
||||
file_server
|
||||
}
|
||||
"""
|
||||
static_file_template_jinja = Template(static_file_template)
|
||||
|
||||
for file in static_files:
|
||||
file_template = static_file_template_jinja.render(file=file)
|
||||
OUTPUT_RULES.append(file_template)
|
||||
|
||||
|
||||
ACCESS_DENIED_PATHS = ["onboarding/*", "wp-*", ".env", "api*", "apple-touch-icon-precomposed.png", "rss.xml", ".git/*", "apple-touch-icon-120x120.png", "apple-touch-icon-120x120-precomposed.png", "apple-touch-icon-152x152.png", "apple-touch-icon-152x152-precomposed.png", ".well-known/*"]
|
||||
|
||||
access_denied_paths_template = """
|
||||
handle_path /{{ file }} {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
"""
|
||||
|
||||
access_denied_paths_template_jinja = Template(access_denied_paths_template)
|
||||
|
||||
for denied_path in ACCESS_DENIED_PATHS:
|
||||
denied_path_template = access_denied_paths_template_jinja.render(file=denied_path)
|
||||
OUTPUT_RULES.append(denied_path_template)
|
||||
|
||||
HUMAN_OUTPUT_RULES = "\n".join(OUTPUT_RULES)
|
||||
|
||||
# with open("scripts/output_rules.txt", "w") as file:
|
||||
# file.write(HUMAN_OUTPUT_RULES)
|
||||
|
||||
caddy_file_templates = {
|
||||
"CaddyfileDevTemplate": "CaddyfileDev",
|
||||
"CaddyfileProdTemplate": "CaddyfileProd",
|
||||
}
|
||||
|
||||
for caddy_file_template, output_caddy_file_template in caddy_file_templates.items():
|
||||
with open(caddy_file_template) as file:
|
||||
caddy_file = Template(file.read())
|
||||
caddy_file_rendered = caddy_file.render(template=HUMAN_OUTPUT_RULES)
|
||||
|
||||
with open(output_caddy_file_template, "w") as file:
|
||||
file.write(caddy_file_rendered)
|
||||
51
scripts/generate_sitemap.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
from aiohttp_client_cache import CachedSession, SQLiteBackend
|
||||
import asyncio
|
||||
from loguru import logger
|
||||
import json
|
||||
from jinja2 import Template
|
||||
|
||||
from progress.bar import Bar
|
||||
|
||||
import datetime
|
||||
|
||||
MEDIUM_URLS = []
|
||||
|
||||
|
||||
async def main():
|
||||
async with CachedSession(cache=SQLiteBackend('medium_cache.sqlite')) as session:
|
||||
responses = [resp async for resp in session.cache.responses.values()]
|
||||
bar = Bar('Processing...', max=len(responses))
|
||||
for resp in responses:
|
||||
body = json.loads(resp._body)
|
||||
lastmod_date = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||
url = body["data"]["post"]["mediumUrl"] if body["data"]["post"] is not None else None
|
||||
if url is None:
|
||||
logger.error("Ignoring non valid Medium post data")
|
||||
bar.next()
|
||||
continue
|
||||
MEDIUM_URLS.append({"url": url, "lastmod": lastmod_date, "changefreq": "monthly", "priority": "1.0"})
|
||||
bar.next()
|
||||
|
||||
bar.finish()
|
||||
|
||||
sitemap_template = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
{% for page in pages %}
|
||||
<url>
|
||||
<loc>{{page.url|safe}}</loc>
|
||||
<lastmod>{{page.lastmod}}</lastmod>
|
||||
<changefreq>{{page.changefreq}}</changefreq>
|
||||
<priority>{{page.priority}}</priority>
|
||||
</url>
|
||||
{% endfor %}
|
||||
</urlset>'''
|
||||
|
||||
template = Template(sitemap_template)
|
||||
|
||||
sitemap_output = template.render(pages=MEDIUM_URLS)
|
||||
with open("static/sitemap.xml", 'w') as f:
|
||||
f.write(sitemap_output)
|
||||
|
||||
logger.info("Done")
|
||||
|
||||
asyncio.run(main())
|
||||
148
scripts/output_rules.txt
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
|
||||
handle_path /site.webmanifest {
|
||||
root * ./static/site.webmanifest
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-32x32.png {
|
||||
root * ./static/favicon-32x32.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /robots.txt {
|
||||
root * ./static/robots.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /ads.txt {
|
||||
root * ./static/ads.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /humans.txt {
|
||||
root * ./static/humans.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-150x150.png {
|
||||
root * ./static/mstile-150x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x310.png {
|
||||
root * ./static/mstile-310x310.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /sitemap.xml {
|
||||
root * ./static/sitemap.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /99860281ef1143d5a5558ad9a21a470d.txt {
|
||||
root * ./static/99860281ef1143d5a5558ad9a21a470d.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-70x70.png {
|
||||
root * ./static/mstile-70x70.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-192x192.png {
|
||||
root * ./static/android-chrome-192x192.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-310x150.png {
|
||||
root * ./static/mstile-310x150.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /safari-pinned-tab.svg {
|
||||
root * ./static/safari-pinned-tab.svg
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /android-chrome-512x512.png {
|
||||
root * ./static/android-chrome-512x512.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon-16x16.png {
|
||||
root * ./static/favicon-16x16.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /favicon.ico {
|
||||
root * ./static/favicon.ico
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /browserconfig.xml {
|
||||
root * ./static/browserconfig.xml
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /mstile-144x144.png {
|
||||
root * ./static/mstile-144x144.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /security.txt {
|
||||
root * ./static/security.txt
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon.png {
|
||||
root * ./static/apple-touch-icon.png
|
||||
file_server
|
||||
}
|
||||
|
||||
handle_path /onboarding/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /wp-* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.env {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /api* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /rss.xml {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.git/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-120x120-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /apple-touch-icon-152x152-precomposed.png {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
|
||||
handle_path /.well-known/* {
|
||||
respond "Access denied" 403
|
||||
}
|
||||
4
scripts/register_caddy.sh
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
arch=$(lscpu | grep Architecture | awk {'print $2'})
|
||||
|
||||
sudo setcap cap_net_bind_service=+ep $(pwd)/bin/${arch}/caddy
|
||||
85
scripts/start_dev.sh
Executable file
|
|
@ -0,0 +1,85 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Same script as start_prod, but adopted to dev environment
|
||||
|
||||
check_env_var() {
|
||||
if [[ -z "${!1}" ]]; then
|
||||
echo "$1 var is blank"
|
||||
else
|
||||
echo "$1 var is set to '${!1}'"
|
||||
fi
|
||||
}
|
||||
|
||||
check_env_var "TELEGRAM_ADMIN_ID"
|
||||
check_env_var "TELEGRAM_BOT_TOKEN"
|
||||
|
||||
arch=$(lscpu | grep Architecture | awk {'print $2'})
|
||||
echo $arch
|
||||
|
||||
redis-cli flushall
|
||||
./bin/$arch/caddy run --config CaddyfileDev &
|
||||
CADDY_PID=$!
|
||||
PYTHONASYNCIODEBUG=1 python3 -m server server &
|
||||
SERVER_PID=$!
|
||||
|
||||
onexit() {
|
||||
echo "onexit"
|
||||
kill $CADDY_PID
|
||||
kill $SERVER_PID
|
||||
}
|
||||
|
||||
trap onexit EXIT
|
||||
|
||||
sendMessageTelegram(){
|
||||
echo ${1}
|
||||
local message=${1}
|
||||
|
||||
curl -X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"chat_id\":\"$TELEGRAM_ADMIN_ID\",\"text\":\"$message\"}" \
|
||||
"https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage"
|
||||
}
|
||||
|
||||
while true
|
||||
do
|
||||
sleep 15
|
||||
|
||||
CHECK_CADDY_PID=$(ps -A| grep $CADDY_PID |wc -l)
|
||||
if [[ $CHECK_CADDY_PID -eq 0 ]]; then
|
||||
# sendMessageTelegram "Restarting caddy, since it's down"
|
||||
./bin/$arch/caddy start --config CaddyfileDev &
|
||||
CADDY_PID=$!
|
||||
fi
|
||||
|
||||
CHECK_SERVER_PID=$(ps -A| grep $SERVER_PID |wc -l)
|
||||
if [[ $CHECK_SERVER_PID -eq 0 ]]; then
|
||||
# sendMessageTelegram "Restarting server, since it's down"
|
||||
PYTHONASYNCIODEBUG=1 python3 -m server server &
|
||||
SERVER_PID=%!
|
||||
fi
|
||||
|
||||
sleep 35
|
||||
|
||||
backend_service_url="http://localhost:7080"
|
||||
backend_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$backend_service_url")
|
||||
|
||||
if [ "$backend_status_code" -lt 200 ]; then
|
||||
sendMessageTelegram "Restarting backend, since it's down"
|
||||
kill $SERVER_PID
|
||||
PYTHONASYNCIODEBUG=1 python3 -m server server &
|
||||
SERVER_PID=$!
|
||||
fi
|
||||
|
||||
reverse_service_url="http://localhost:6752"
|
||||
reverse_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$reverse_service_url")
|
||||
|
||||
if [ "$reverse_status_code" -lt 200 ]; then
|
||||
sendMessageTelegram "Restarting reverse, since it's down"
|
||||
kill $CADDY_PID
|
||||
./bin/$arch/caddy start --config CaddyfileDev &
|
||||
CADDY_PID=$!
|
||||
fi
|
||||
|
||||
sleep 65
|
||||
done
|
||||
|
||||
7
scripts/start_maintance.sh
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
arch=$(lscpu | grep Architecture | awk {'print $2'})
|
||||
echo $arch
|
||||
|
||||
redis-cli flushall
|
||||
./bin/$arch/caddy run --config ./CaddyfileMaintance
|
||||
74
scripts/start_prod.sh
Executable file
|
|
@ -0,0 +1,74 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [ -z "$TELEGRAM_ADMIN_ID" ]; then echo "TELEGRAM_ADMIN_ID var is blank"; else echo "TELEGRAM_ADMIN_ID var is set to '$TELEGRAM_ADMIN_ID'"; fi
|
||||
if [ -z "$TELEGRAM_BOT_TOKEN" ]; then echo "TELEGRAM_BOT_TOKEN var is blank"; else echo "TELEGRAM_BOT_TOKEN var is set to '$TELEGRAM_BOT_TOKEN'"; fi
|
||||
|
||||
arch=$(lscpu | grep Architecture | awk {'print $2'})
|
||||
|
||||
redis-cli flushall
|
||||
./bin/$arch/caddy run --config CaddyfileProd &
|
||||
CADDY_PID=$!
|
||||
python3 -m server server &
|
||||
SERVER_PID=$!
|
||||
|
||||
function onexit() {
|
||||
echo "onexit"
|
||||
sleep 25
|
||||
kill $CADDY_PID
|
||||
kill $SERVER_PID
|
||||
}
|
||||
# trap onexit EXIT
|
||||
|
||||
sendMessageTelegram(){
|
||||
echo ${1}
|
||||
local message=${1}
|
||||
|
||||
curl -X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"chat_id\":\"$TELEGRAM_ADMIN_ID\",\"text\":\"$message\"}" \
|
||||
"https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage"
|
||||
}
|
||||
|
||||
while true
|
||||
do
|
||||
sleep 5
|
||||
|
||||
CHECK_CADDY_PID=$(ps -A| grep $CADDY_PID |wc -l)
|
||||
if [[ $CHECK_CADDY_PID -eq 0 ]]; then
|
||||
sendMessageTelegram "Restarting caddy, since it's down"
|
||||
./bin/$arch/caddy run --config CaddyfileProd &
|
||||
CADDY_PID=$!
|
||||
fi
|
||||
|
||||
CHECK_SERVER_PID=$(ps -A| grep $SERVER_PID |wc -l)
|
||||
if [[ $CHECK_SERVER_PID -eq 0 ]]; then
|
||||
sendMessageTelegram "Restarting server, since it's down"
|
||||
python3 -m server server &
|
||||
SERVER_PID=$!
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
|
||||
backend_service_url="http://localhost:7080"
|
||||
backend_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$backend_service_url")
|
||||
|
||||
if [ "$backend_status_code" -lt 200 ]; then
|
||||
sendMessageTelegram "Restarting backend, since it's down"
|
||||
kill $SERVER_PID
|
||||
python3 -m server server &
|
||||
SERVER_PID=$!
|
||||
fi
|
||||
|
||||
reverse_service_url="http://localhost"
|
||||
reverse_status_code=$(curl -m 10 -s -o /dev/null -w "%{http_code}" "$reverse_service_url")
|
||||
|
||||
if [ "$reverse_status_code" -lt 308 ]; then
|
||||
sendMessageTelegram "Restarting reverse, since it's down"
|
||||
kill $CADDY_PID
|
||||
./bin/$arch/caddy run --config CaddyfileProd &
|
||||
CADDY_PID=$!
|
||||
fi
|
||||
|
||||
sleep 65
|
||||
done
|
||||
|
||||
43
server/__init__.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import datetime as dt
|
||||
import pickledb
|
||||
import logging
|
||||
from contextvars import ContextVar
|
||||
from typing import Optional
|
||||
|
||||
from jinja2 import Environment, DebugUndefined, FileSystemLoader
|
||||
import redis.asyncio as redis
|
||||
from xkcdpass import xkcd_password as xp
|
||||
|
||||
from server import config
|
||||
from server.utils.loguru_handler import InterceptHandler
|
||||
|
||||
redis_storage = redis.Redis(host="localhost", port=6379, db=0)
|
||||
|
||||
jinja_env = Environment(enable_async=True)
|
||||
jinja_safe_env = Environment(undefined=DebugUndefined)
|
||||
template_env = Environment(loader=FileSystemLoader("./server/templates"), enable_async=True)
|
||||
template_safe_env = Environment(loader=FileSystemLoader("./server/templates"), undefined=DebugUndefined)
|
||||
|
||||
base_template = template_env.get_template("base.html")
|
||||
url_line_template = template_env.get_template("url_line.html").render()
|
||||
main_template_raw = template_safe_env.get_template("main.html")
|
||||
postleter_template = template_env.get_template("postleter.html")
|
||||
error_template_raw = template_safe_env.get_template("error.html")
|
||||
|
||||
main_template_raw_rendered = main_template_raw.render(url_line=url_line_template)
|
||||
main_template = jinja_env.from_string(main_template_raw_rendered)
|
||||
|
||||
error_template_raw_rendered = error_template_raw.render(url_line=url_line_template)
|
||||
error_template = jinja_env.from_string(error_template_raw_rendered)
|
||||
|
||||
logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
||||
|
||||
url_correlation: ContextVar[Optional[str]] = ContextVar("url_correlation", default="UNKNOWN_URL")
|
||||
transponder_code_correlation: ContextVar[Optional[str]] = ContextVar("transponder_code_correlation", default="unknown transponder location... Beep!")
|
||||
|
||||
ban_db = pickledb.load('ban_post_list.db', True)
|
||||
|
||||
START_TIME = dt.datetime.now().strftime("%H-%M-%S")
|
||||
WORDS_LIST_FILE = "xkcdpass/static/legac"
|
||||
|
||||
xkcd_passwd = xp.generate_wordlist(wordfile=WORDS_LIST_FILE, min_length=5, max_length=8)
|
||||
3
server/__main__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from server.cli import cli
|
||||
|
||||
cli()
|
||||
28
server/cli.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from argparse import ArgumentParser
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def cli():
|
||||
parser = ArgumentParser(prog="python3 -m server", description="Freedium server CLI")
|
||||
cmd_subparsers = parser.add_subparsers(dest="cmd", required=True)
|
||||
|
||||
server_cmd_parser = cmd_subparsers.add_parser("server")
|
||||
server_cmd_parser.add_argument("--port", nargs="?", type=int, const=7080, help="Port number", default=7080)
|
||||
|
||||
opts = parser.parse_args()
|
||||
logger.trace(opts)
|
||||
|
||||
if opts.cmd == "server":
|
||||
server_cmd(server_cmd_parser, opts)
|
||||
|
||||
|
||||
def server_cmd(cmd, opts):
|
||||
from server.utils.utils import is_port_in_use
|
||||
|
||||
if is_port_in_use(opts.port):
|
||||
cmd.error(f"Port {opts.port} is in use or permission denied")
|
||||
|
||||
from server.worker import execute_server_worker
|
||||
|
||||
execute_server_worker(host="0.0.0.0", port=opts.port)
|
||||
19
server/config.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from starlette.config import Config
|
||||
|
||||
config = Config(".env")
|
||||
|
||||
HOST_ADDRESS = config("HOST_ADDRESS", default="https://freedium.cfd")
|
||||
MEDIUM_AUTH_COOKIES = config("MEDIUM_AUTH_COOKIES", default=None)
|
||||
TELEGRAM_ADMIN_ID = config("TELEGRAM_ADMIN_ID", cast=int, default=0)
|
||||
ADMIN_ADMIN_SECRET_KEY = config("ADMIN_SECRET_KEY")
|
||||
TELEGRAM_BOT_TOKEN = config("TELEGRAM_BOT_TOKEN", default=None)
|
||||
LOG_LEVEL_NAME = config("LOG_LEVEL_NAME", default="INFO")
|
||||
MORE_LOGS = config("MORE_LOGS", cast=bool, default=False)
|
||||
DISABLE_EXTERNAL_DOCS = config("DISABLE_EXTERNAL_DOCS", cast=bool, default=True)
|
||||
DISABLE_RATE_LIMITER = config("DISABLE_RATE_LIMITER", cast=bool, default=True)
|
||||
TIMEOUT = config("TIMEOUT", cast=int, default=20)
|
||||
REQUEST_TIMEOUT = config("REQUEST_TIMEOUT", cast=int, default=40)
|
||||
WORKER_TIMEOUT = config("WORKER_TIMEOUT", cast=int, default=60)
|
||||
SENTRY_SDK_DSN = config("SENTRY_SDK_DSN", default=None)
|
||||
ENABLE_ADS_BANNER = config("ENABLE_ADS_BANNER", cast=bool, default=False)
|
||||
CACHE_LIFE_TIME = config("CACHE_LIFE_TIME", cast=int, default=60 * 60 * 24)
|
||||
1
server/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
|
||||
18
server/exceptions/main.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import sentry_sdk
|
||||
|
||||
from server.utils.error import generate_error
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
|
||||
@trace
|
||||
async def handle_500_error(request, exc):
|
||||
try:
|
||||
raise exc
|
||||
except Exception as e:
|
||||
sentry_sdk.capture_exception(e)
|
||||
|
||||
return await generate_error()
|
||||
|
||||
|
||||
def register_main_error_handler(app):
|
||||
app.add_exception_handler(500, handle_500_error)
|
||||
1
server/handlers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from server.handlers import main
|
||||
62
server/handlers/main.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
from html5lib.html5parser import parse
|
||||
from html5lib import serialize
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from server import base_template, main_template, config
|
||||
|
||||
from fastapi import Request
|
||||
|
||||
from server.handlers.post import render_medium_post_link, render_postleter
|
||||
from server.handlers.reverse_proxy import miro_proxy, iframe_proxy
|
||||
from server.handlers.misc import report_problem, delete_from_cache
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
|
||||
@trace
|
||||
async def route_processing(path: str, request: Request):
|
||||
if not path:
|
||||
return await main_page()
|
||||
if request.scope.get("query_string"):
|
||||
path = request.url.path + "?" + request.scope["query_string"].decode()
|
||||
else:
|
||||
path = request.url.path
|
||||
path = path.removeprefix("/")
|
||||
|
||||
if path.startswith("render-no-cache/"):
|
||||
path = path.removeprefix("render-no-cache/")
|
||||
if path.startswith("/no-redis/"):
|
||||
path = path.removeprefix("/no-redis/")
|
||||
return await render_medium_post_link(path, True, False)
|
||||
return await render_medium_post_link(path, False)
|
||||
elif path.startswith("@miro/"):
|
||||
miro_data = path.removeprefix("@miro/")
|
||||
return await miro_proxy(miro_data)
|
||||
elif path.startswith("render_iframe/"):
|
||||
iframe_id = path.removeprefix("render_iframe/")
|
||||
return await iframe_proxy(iframe_id)
|
||||
|
||||
return await render_medium_post_link(path)
|
||||
|
||||
|
||||
@trace
|
||||
async def main_page():
|
||||
postleter_template = await render_postleter(as_html=True)
|
||||
main_template_rendered = await main_template.render_async(postleter=postleter_template)
|
||||
base_template_rendered = await base_template.render_async(body_template=main_template_rendered, HOST_ADDRESS=config.HOST_ADDRESS)
|
||||
parsed_template = parse(base_template_rendered)
|
||||
serialized_template = serialize(parsed_template, encoding='utf-8')
|
||||
return HTMLResponse(serialized_template)
|
||||
|
||||
|
||||
def register_main_router(app):
|
||||
app.add_api_route(path="/delete-from-cache", endpoint=delete_from_cache, methods=["POST"])
|
||||
app.add_api_route(path="/report-problem", endpoint=report_problem, methods=["POST"])
|
||||
app.add_api_route(
|
||||
path="/{path:path}",
|
||||
endpoint=route_processing,
|
||||
methods=["GET", "HEAD"],
|
||||
response_model=str,
|
||||
tags=["pages"],
|
||||
summary=None,
|
||||
description=None,
|
||||
)
|
||||
38
server/handlers/misc.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from server import config, ban_db
|
||||
from server.utils.notify import send_message
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
class ReportProblem(BaseModel):
|
||||
page: str
|
||||
description: str
|
||||
|
||||
|
||||
class DeleteFromCache(BaseModel):
|
||||
key: str
|
||||
ADMIN_SECRET_KEY: str
|
||||
|
||||
|
||||
@trace
|
||||
async def report_problem(problem: ReportProblem):
|
||||
await send_message(f"New problem report: \n{problem.description}\n\n{problem.page}")
|
||||
return JSONResponse({"message": "OK"}, status_code=200)
|
||||
|
||||
|
||||
@trace
|
||||
async def delete_from_cache(key_data: DeleteFromCache):
|
||||
if key_data.ADMIN_SECRET_KEY != config.ADMIN_SECRET_KEY:
|
||||
return JSONResponse({"message": f"Wrong secret key: {key_data.ADMIN_SECRET_KEY}"}, status_code=403)
|
||||
|
||||
try:
|
||||
post = MediumParser(key_data.key, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
|
||||
await post.delete_from_cache()
|
||||
except Exception as ex:
|
||||
logger.exception(ex)
|
||||
return JSONResponse({"message": f"Couldn't delete from cache: {ex}"}, status_code=500)
|
||||
else:
|
||||
ban_db.set(key_data.key, 1)
|
||||
return JSONResponse({"message": "OK"}, status_code=200)
|
||||
104
server/handlers/post.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
import sentry_sdk
|
||||
import pickle
|
||||
from fastapi.responses import HTMLResponse
|
||||
from html5lib.html5parser import parse
|
||||
from html5lib import serialize
|
||||
from loguru import logger
|
||||
|
||||
from server.utils.error import generate_error
|
||||
from server.utils.logger_trace import trace
|
||||
from server.utils.notify import send_message
|
||||
from server.utils.cache import aio_redis_cache
|
||||
from server.utils.utils import correct_url, safe_check_redis_connection
|
||||
from server import base_template, config, url_correlation, redis_storage, postleter_template
|
||||
|
||||
from medium_parser import medium_parser_exceptions
|
||||
from medium_parser import cache as medium_cache
|
||||
from medium_parser.core import MediumParser
|
||||
from medium_parser.utils import is_valid_medium_post_id_hexadecimal
|
||||
|
||||
@trace
|
||||
@aio_redis_cache(10 * 60)
|
||||
async def render_postleter(limit: int = 60, as_html: bool = False):
|
||||
random_post_id_list = [i[0] for i in medium_cache.random(limit)]
|
||||
|
||||
outlenget_posts_list = []
|
||||
for post_id in random_post_id_list:
|
||||
try:
|
||||
post = MediumParser(post_id, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
|
||||
await post.query()
|
||||
post_metadata = await post.generate_metadata(as_dict=True)
|
||||
outlenget_posts_list.append(post_metadata)
|
||||
except Exception as ex:
|
||||
logger.error(f"Couldn't render post_id for postleter: {post_id}, ex: {ex}")
|
||||
# await send_message(f"Couldn't render post_id for postleter: {post_id}, ex: {ex}")
|
||||
|
||||
postleter_template_rendered = await postleter_template.render_async(post_list=outlenget_posts_list)
|
||||
if as_html:
|
||||
return postleter_template_rendered
|
||||
return HTMLResponse(postleter_template_rendered)
|
||||
|
||||
|
||||
@trace
|
||||
async def render_medium_post_link(path: str, use_cache: bool = True, use_redis: bool = True):
|
||||
redis_available = await safe_check_redis_connection(redis_storage)
|
||||
|
||||
try:
|
||||
if is_valid_medium_post_id_hexadecimal(path):
|
||||
medium_parser = MediumParser(path, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
|
||||
else:
|
||||
url = correct_url(path)
|
||||
medium_parser = await MediumParser.from_url(url, timeout=config.TIMEOUT, host_address=config.HOST_ADDRESS, auth_cookies=config.MEDIUM_AUTH_COOKIES)
|
||||
medium_post_id = medium_parser.post_id
|
||||
if redis_available and use_cache and use_redis:
|
||||
redis_result = await redis_storage.get(medium_post_id)
|
||||
else:
|
||||
redis_result = None
|
||||
if not redis_result:
|
||||
await medium_parser.query(use_cache=use_cache)
|
||||
rendered_medium_post = await medium_parser.render_as_html("server/templates")
|
||||
else:
|
||||
rendered_medium_post = pickle.loads(redis_result)
|
||||
except medium_parser_exceptions.InvalidURL as ex:
|
||||
logger.exception(ex)
|
||||
sentry_sdk.capture_exception(ex)
|
||||
return await generate_error(
|
||||
"Unable to identify the Medium article URL.",
|
||||
status_code=404,
|
||||
)
|
||||
except (medium_parser_exceptions.InvalidMediumPostURL, medium_parser_exceptions.MediumPostQueryError, medium_parser_exceptions.PageLoadingError) as ex:
|
||||
logger.exception(ex)
|
||||
sentry_sdk.capture_exception(ex)
|
||||
return await generate_error(
|
||||
"Unable to identify the link as a Medium.com article page. Please check the URL for any typing errors.",
|
||||
status_code=404,
|
||||
)
|
||||
except medium_parser_exceptions.InvalidMediumPostID as ex:
|
||||
logger.exception(ex)
|
||||
sentry_sdk.capture_exception(ex)
|
||||
return await generate_error("Unable to identify the Medium article ID.", status_code=500)
|
||||
except medium_parser_exceptions.NotValidMediumURL as ex:
|
||||
return await generate_error("You sure that this is a valid Medium.com URL?", status_code=404, quiet=True)
|
||||
except Exception as ex:
|
||||
logger.exception(ex)
|
||||
sentry_sdk.capture_exception(ex)
|
||||
return await generate_error(status_code=500)
|
||||
else:
|
||||
base_context = {
|
||||
"enable_ads_header": config.ENABLE_ADS_BANNER,
|
||||
"body_template": rendered_medium_post.data,
|
||||
"title": rendered_medium_post.title,
|
||||
"description": rendered_medium_post.description,
|
||||
}
|
||||
rendered_post = await base_template.render_async(base_context, HOST_ADDRESS=config.HOST_ADDRESS)
|
||||
parsed_rendered_post = parse(rendered_post)
|
||||
serialized_rendered_post = serialize(parsed_rendered_post, encoding='utf-8')
|
||||
|
||||
if not redis_result:
|
||||
if not redis_available:
|
||||
await send_message("ERROR: Redis is not available. Please check your configuration.")
|
||||
elif use_redis:
|
||||
await redis_storage.setex(medium_post_id, config.CACHE_LIFE_TIME, pickle.dumps(rendered_medium_post))
|
||||
await send_message(f"✅ Successfully rendered post: {url_correlation.get()}", True, "GOOD")
|
||||
|
||||
return HTMLResponse(serialized_rendered_post)
|
||||
34
server/handlers/reverse_proxy.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import aiohttp
|
||||
from fastapi import Response
|
||||
|
||||
from server import config
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
IFRAME_HEADERS = {"Access-Control-Allow-Origin": "*", "X-Frame-Options": "SAMEORIGIN"}
|
||||
|
||||
|
||||
@trace
|
||||
async def iframe_proxy(iframe_id):
|
||||
# How Medium embeds works: https://stackoverflow.com/questions/56594766/medium-embed-ly-notifyresize-does-not-work-on-safari
|
||||
async with aiohttp.ClientSession() as client:
|
||||
request = await client.get(
|
||||
f"https://medium.com/media/{iframe_id}",
|
||||
timeout=config.TIMEOUT,
|
||||
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
|
||||
)
|
||||
request_content = await request.text()
|
||||
request_content = request_content.replace("document.domain = document.domain", "console.log('[FREEDIUM] iframe workaround')")
|
||||
return Response(content=request_content, media_type="text/html", headers=IFRAME_HEADERS)
|
||||
|
||||
|
||||
@trace
|
||||
async def miro_proxy(miro_data: str):
|
||||
async with aiohttp.ClientSession() as client:
|
||||
request = await client.get(
|
||||
f"https://miro.medium.com/{miro_data}",
|
||||
timeout=config.TIMEOUT,
|
||||
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"},
|
||||
)
|
||||
request_content = await request.read()
|
||||
content_type = request.headers["Content-Type"]
|
||||
return Response(content=request_content, media_type=content_type)
|
||||
77
server/main.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
from math import ceil
|
||||
import sentry_sdk
|
||||
import asyncio
|
||||
from contextlib import suppress
|
||||
from fastapi.exceptions import HTTPException
|
||||
from fastapi import FastAPI, Depends, APIRouter
|
||||
from loguru import logger
|
||||
from fastapi_limiter import FastAPILimiter
|
||||
from fastapi_limiter.depends import RateLimiter
|
||||
from server import config, redis_storage
|
||||
from server.exceptions.main import register_main_error_handler
|
||||
from server.handlers.main import register_main_router
|
||||
from server.middlewares import register_middlewares
|
||||
from server.utils.utils import safe_check_redis_connection
|
||||
|
||||
NAME = "Freedium"
|
||||
VERSION = "1.0"
|
||||
|
||||
APP_TITLE = f"{NAME}'s REST API"
|
||||
APP_VERSION = VERSION
|
||||
|
||||
FASTAPI_APPLICATION_CONFIG = {"title": APP_TITLE, "version": APP_VERSION}
|
||||
|
||||
if config.DISABLE_EXTERNAL_DOCS:
|
||||
FASTAPI_APPLICATION_CONFIG.update({"openapi_url": None, "docs_url": None, "redoc_url": None})
|
||||
|
||||
if config.SENTRY_SDK_DSN:
|
||||
sentry_sdk.init(dsn=config.SENTRY_SDK_DSN, traces_sample_rate=1.0)
|
||||
|
||||
|
||||
async def limiter_callback(request, response, pexpire: int):
|
||||
expire = ceil(pexpire / 1000)
|
||||
|
||||
raise HTTPException(429, {"error": "Too many requests. Probably you use Freedium to train own AI moodel, hmm? :/"}, headers={"Retry-After": str(expire)})
|
||||
|
||||
|
||||
async def limiter_identifier(request):
|
||||
forwarded_ip = request.headers.get("X-Forwarded-For")
|
||||
original_ip = request.headers.get("ip")
|
||||
if forwarded_ip:
|
||||
ip = forwarded_ip.split(",")[0]
|
||||
elif original_ip:
|
||||
ip = original_ip
|
||||
else:
|
||||
ip = "127.0.0.1"
|
||||
return str(ip)
|
||||
|
||||
|
||||
app = FastAPI(**FASTAPI_APPLICATION_CONFIG)
|
||||
|
||||
if config.DISABLE_RATE_LIMITER:
|
||||
router = APIRouter()
|
||||
else:
|
||||
router = APIRouter(dependencies=[Depends(RateLimiter(times=5, seconds=2, identifier=limiter_identifier, callback=limiter_callback))])
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
if not config.DISABLE_RATE_LIMITER and await safe_check_redis_connection(redis_storage):
|
||||
await FastAPILimiter.init(redis_storage)
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown():
|
||||
logger.debug("Close Redis connection")
|
||||
await redis_storage.close()
|
||||
if config.SENTRY_SDK_DSN:
|
||||
logger.debug("Flush Sentry messages")
|
||||
sentry_sdk.flush()
|
||||
|
||||
|
||||
register_main_router(router)
|
||||
register_main_error_handler(app)
|
||||
|
||||
register_middlewares(app)
|
||||
|
||||
app.include_router(router)
|
||||
15
server/middlewares/__init__.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
from starlette.middleware.cors import CORSMiddleware
|
||||
from server.middlewares.logger import LoggerMiddleware
|
||||
|
||||
|
||||
def register_middlewares(app):
|
||||
app.add_middleware(LoggerMiddleware)
|
||||
origins = ["*"]
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
92
server/middlewares/logger.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import asyncio
|
||||
import time
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from loguru import logger
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response, StreamingResponse
|
||||
from starlette.types import Message
|
||||
|
||||
from server import transponder_code_correlation, url_correlation, xkcd_passwd, xp, config
|
||||
from server.utils.error import generate_error
|
||||
from server.utils.utils import string_to_number_ascii
|
||||
|
||||
|
||||
async def set_body(request: Request, body: bytes):
|
||||
async def receive() -> Message:
|
||||
return {"type": "http.request", "body": body}
|
||||
request._receive = receive
|
||||
|
||||
|
||||
async def get_body(request: Request) -> bytes:
|
||||
body = await request.body()
|
||||
await set_body(request, body)
|
||||
return body
|
||||
|
||||
|
||||
class LoggerMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[StreamingResponse]]) -> Response: # type: ignore
|
||||
start_time = time.time()
|
||||
generated_id = xp.generate_xkcdpassword(xkcd_passwd, delimiter="-", numwords=3)
|
||||
transponder_code = string_to_number_ascii(generated_id)
|
||||
transponder_code_correlation.set(transponder_code)
|
||||
url_correlation.set(request.url)
|
||||
with logger.contextualize(id=generated_id):
|
||||
logger.trace(f"Current ID '{generated_id}' transponder code is '{transponder_code}'")
|
||||
logger.trace(request.__dict__)
|
||||
|
||||
await request.body()
|
||||
|
||||
logger.debug(f"< HTTP/{request['http_version']} {request.method} {request.url}")
|
||||
logger.debug(f"< IP host origin: {request.client.host}")
|
||||
|
||||
logger.debug("< Params:")
|
||||
for name, value in request.path_params.items():
|
||||
logger.debug(f"\t< {name}: {value}")
|
||||
|
||||
logger.debug("< Headers:")
|
||||
for name, value in request.headers.items():
|
||||
value = self._sanitize_header_value(name, value)
|
||||
logger.debug(f"\t< {name}: {value}")
|
||||
|
||||
if hasattr(request, "cookies") and request.cookies:
|
||||
logger.debug("< Coockies:")
|
||||
for name, value in request.cookies.items():
|
||||
logger.debug(f"\t< {name}: {value}")
|
||||
|
||||
# Workaround for stupid Starlette bug: https://github.com/tiangolo/fastapi/issues/394
|
||||
await get_body(request)
|
||||
|
||||
try:
|
||||
response = await asyncio.wait_for(call_next(request), timeout=config.REQUEST_TIMEOUT)
|
||||
except Exception as ex:
|
||||
logger.exception(ex)
|
||||
response = await generate_error()
|
||||
|
||||
logger.trace(response.__dict__)
|
||||
|
||||
response.headers["X-Request-ID"] = generated_id
|
||||
# response.headers["Access-Control-Expose-Headers"] = "X-Request-ID, Origin, X-Requested-With, Content-Type, Accept"
|
||||
|
||||
logger.debug(f"> HTTP/{request['http_version']} {response.status_code}")
|
||||
|
||||
logger.debug("> Headers:")
|
||||
for name, value in response.headers.items():
|
||||
value = self._sanitize_header_value(name, value)
|
||||
logger.debug(f"\t> {name}: {value}")
|
||||
|
||||
if hasattr(response, "cookies"):
|
||||
logger.debug("> Coockies:")
|
||||
for name, value in response.cookies.items():
|
||||
logger.debug(f"\t> {name}: {value}")
|
||||
|
||||
process_time = time.time() - start_time
|
||||
response.headers["X-Process-Time"] = str(process_time)
|
||||
|
||||
return response
|
||||
|
||||
def _sanitize_header_value(self, name, value):
|
||||
if name.lower() == "authorization":
|
||||
value = f"{value[:25]}******"
|
||||
return value
|
||||
365
server/templates/base.html
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
|
||||
<title>{{ title or "Breaking Medium paywall!" }} - Freedium</title>
|
||||
{% if creator %}<meta name="author" content="{{ creator.name }}" />{% endif %}
|
||||
<meta name="description" content="{{ description or 'Your paywall breakthrough for Medium!' }}" />
|
||||
<meta name="keywords" content="medium, paywall, medium.com, paywall breakthrough" />
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<script src="https://cdn.tailwindcss.com?plugins=forms,typography,aspect-ratio"></script>
|
||||
<link href="https://glyph.medium.com/css/unbound.css" rel="stylesheet">
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
|
||||
<link rel="manifest" href="/site.webmanifest">
|
||||
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#00aba9">
|
||||
<meta name="msapplication-TileColor" content="#00aba9">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/highlight.min.js"></script>
|
||||
<link rel="stylesheet" href="https://unpkg.com/@highlightjs/cdn-assets@11.8.0/styles/atom-one-dark.min.css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@17.8.4/dist/lazyload.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/lightense-images@1.0.17/dist/lightense.min.js"></script>
|
||||
<script>
|
||||
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
|
||||
document.documentElement.classList.add('dark');
|
||||
//document.getElementById('darkIcon').classList.remove('hidden');
|
||||
//document.getElementById('lightIcon').classList.add('hidden')
|
||||
} else {
|
||||
document.documentElement.classList.remove('dark')
|
||||
//document.getElementById('lightIcon').classList.remove('hidden');
|
||||
//document.getElementById('darkIcon').classList.add('hidden');
|
||||
}
|
||||
</script>
|
||||
<style>
|
||||
.shadow-lf {
|
||||
box-shadow: inset 3px 0 0 0 rgb(209 207 239 / var(--tw-bg-opacity));
|
||||
}
|
||||
</style>
|
||||
<style>
|
||||
.notification-container {
|
||||
display: none;
|
||||
position: fixed;
|
||||
top: 20px;
|
||||
padding: 2%;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
.notification-card {
|
||||
background-color: #fff;
|
||||
border: 1px solid #ccc;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
padding: 10px 20px;
|
||||
border-radius: 5px;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
window._resizeIframe = function (iframeData)
|
||||
{
|
||||
iframeData.iframe.height = iframeData.height
|
||||
_resizeIframeWidth()
|
||||
}
|
||||
|
||||
function _resizeIframeWidth(){ var element = document.querySelector(".main-content");
|
||||
var width = element.offsetWidth;
|
||||
|
||||
iframes = document.getElementsByTagName("iframe");
|
||||
for (var i = 0; i < iframes.length; i++) {
|
||||
iframes[i].width = width
|
||||
}
|
||||
|
||||
window.onresize = _resizeIframeWidth
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<div class="fixed bottom-4 left-4" style="z-index: 999999;">
|
||||
<button id="openProblemModal"
|
||||
class="m-1.5 flex items-center bg-red-500 text-white py-2 px-4 rounded-full shadow-lg hover:bg-red-600 focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
|
||||
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><style>svg{fill:#ffffff}</style>
|
||||
<path d="M256 32c14.2 0 27.3 7.5 34.5 19.8l216 368c7.3 12.4 7.3 27.7 .2 40.1S486.3 480 472 480H40c-14.3 0-27.6-7.7-34.7-20.1s-7-27.8 .2-40.1l216-368C228.7 39.5 241.8 32 256 32zm0 128c-13.3 0-24 10.7-24 24V296c0 13.3 10.7 24 24 24s24-10.7 24-24V184c0-13.3-10.7-24-24-24zm32 224a32 32 0 1 0 -64 0 32 32 0 1 0 64 0z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button id="darkModeToggle"
|
||||
class="m-1.5 flex items-center bg-blue-500 text-white py-2 px-4 rounded-full shadow-lg hover:bg-blue-600 focus:outline-none">
|
||||
<svg id="darkIcon" xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 384 512">
|
||||
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
|
||||
<path d="M223.5 32C100 32 0 132.3 0 256S100 480 223.5 480c60.6 0 115.5-24.2 155.8-63.4c5-4.9 6.3-12.5 3.1-18.7s-10.1-9.7-17-8.5c-9.8 1.7-19.8 2.6-30.1 2.6c-96.9 0-175.5-78.8-175.5-176c0-65.8 36-123.1 89.3-153.3c6.1-3.5 9.2-10.5 7.7-17.3s-7.3-11.9-14.3-12.5c-6.3-.5-12.6-.8-19-.8z" />
|
||||
</svg>
|
||||
<!-- SVG icon for light mode (e.g., a sun) -->
|
||||
<svg class="hidden" id="lightIcon" xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
|
||||
<!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
|
||||
<path d="M361.5 1.2c5 2.1 8.6 6.6 9.6 11.9L391 121l107.9 19.8c5.3 1 9.8 4.6 11.9 9.6s1.5 10.7-1.6 15.2L446.9 256l62.3 90.3c3.1 4.5 3.7 10.2 1.6 15.2s-6.6 8.6-11.9 9.6L391 391 371.1 498.9c-1 5.3-4.6 9.8-9.6 11.9s-10.7 1.5-15.2-1.6L256 446.9l-90.3 62.3c-4.5 3.1-10.2 3.7-15.2 1.6s-8.6-6.6-9.6-11.9L121 391 13.1 371.1c-5.3-1-9.8-4.6-11.9-9.6s-1.5-10.7 1.6-15.2L65.1 256 2.8 165.7c-3.1-4.5-3.7-10.2-1.6-15.2s6.6-8.6 11.9-9.6L121 121 140.9 13.1c1-5.3 4.6-9.8 9.6-11.9s10.7-1.5 15.2 1.6L256 65.1 346.3 2.8c4.5-3.1 10.2-3.7 15.2-1.6zM160 256a96 96 0 1 1 192 0 96 96 0 1 1 -192 0zm224 0a128 128 0 1 0 -256 0 128 128 0 1 0 256 0z" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
<nav id="header" class="fixed w-full z-9 top-0 dark:bg-gray-800 dark:text-white bg-white shadow">
|
||||
<div class="notification-container">
|
||||
<div class="notification-card dark:bg-gray-800 bg-white">
|
||||
<p class="text-2xl pb-5 text-black dark:text-white">Achtung !!!</p>
|
||||
<p class="pb-3 text-black dark:text-white">Sooo, it was going to take a while, but now we have it. Our whole Github organization is not public for now. Reddit community, that was beginning all of that also gone - reddit.com/r/paywall/comments/15jsr6z/bypass_mediumcom_paywall</br></br>We have moved to Codeberg - codeberg.org/Freedium-cfd</br></br>Medium, thank you >.</p>
|
||||
<a href="https://patreon.com/Freedium" target="_blank" title="Patreon">
|
||||
<button class="bg-red-400 mx-1 text-white hover:bg-red-500 font-semibold py-1 px-2 rounded mt-2">
|
||||
Patreon
|
||||
</button>
|
||||
</a>
|
||||
<a href="https://www.buymeacoffee.com/zhymabekroman" target="_blank" title="Buy me a coffee">
|
||||
<button class="bg-orange-500 hover:bg-blue-700 mx-1 text-white font-semibold py-1 px-2 rounded mt-2">
|
||||
Buy me a coffee
|
||||
</button>
|
||||
</a>
|
||||
|
||||
<button class="bg-gray-300 mx-1 hover:bg-gray-400 text-gray-800 font-semibold py-1 px-2 rounded mt-2 close-button">
|
||||
Close
|
||||
</button>
|
||||
<a href="https://codeberg.org/Freedium-cfd/web" target="_blank" title="Codeberg">
|
||||
<button class="bg-gray-700 hover:bg-gray-600 mx-1 text-white font-semibold py-1 px-2 rounded mt-2">
|
||||
Source code - Codeberg
|
||||
</button>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if enable_ads_header %}
|
||||
<div class="w-full bg-yellow-400 text-center py-1 px-4"><p class="text-yellow-900">Place your advertisement here! Contact us at advertise@freedium.com</p></div>
|
||||
{% endif %}
|
||||
|
||||
<div id="progress" class="h-1 z-20 top-0" style="background:linear-gradient(to right, #4dc0b5 var(--scroll), transparent 0)"></div>
|
||||
<div class="w-full md:max-w-4xl mx-auto flex flex-wrap items-center justify-between mt-0 py-3">
|
||||
<div class="pl-4">
|
||||
<a class="text-green-500 text-base no-underline hover:no-underline font-extrabold text-xl"
|
||||
href="/"
|
||||
onclick="navigateToOrigin()">Freedium</a>
|
||||
</div>
|
||||
<div class="block lg:hidden pr-4">
|
||||
<button id="nav-toggle"
|
||||
class="flex items-center px-3 py-2 border rounded text-gray-500 dark:text-white border-gray-600 hover:text-gray-900 dark:hover:text-white hover:border-green-500 appearance-none focus:outline-none">
|
||||
<svg class="fill-current h-3 w-3" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
|
||||
<title>Menu</title>
|
||||
<path d="M0 3h20v2H0V3zm0 6h20v2H0V9zm0 6h20v2H0v-2z" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
<div class="w-full flex-grow lg:flex lg:items-center lg:w-auto hidden lg:block mt-2 lg:mt-0 dark:bg-gray-800 bg-white"
|
||||
id="nav-content">
|
||||
<ul class="list-reset lg:flex justify-end flex-1 items-center">
|
||||
<li class="mr-3">
|
||||
<a class="inline-block text-gray-600 dark:text-white no-underline hover:text-gray-900 dark:hover:text-white hover:text-underline py-2 px-4"
|
||||
href="https://medium.com/">Medium.com</a>
|
||||
</li>
|
||||
<li class="mr-3">
|
||||
<a class="inline-block text-gray-600 dark:text-white no-underline hover:text-gray-900 dark:hover:text-white hover:text-underline py-2 px-4"
|
||||
href="https://codeberg.org/Freedium-cfd/web">Source code - Codeberg</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<body class="dark:bg-gray-800 bg-white">{{ body_template }}</body>
|
||||
<div id="problemModal"
|
||||
class="modal hidden fixed inset-0 w-full h-full flex items-center justify-center overflow-y-auto z-10 bg-black bg-opacity-50">
|
||||
<div class="modal-container w-11/12 md:max-w-xl mx-auto rounded shadow-lg max-h-screen">
|
||||
<div class="modal-content bg-white dark:bg-gray-800 dark:text-white my-8 py-4 text-left px-6">
|
||||
<h1 class="text-3xl font-bold">Reporting a Problem</h1>
|
||||
<div class="mt-3">
|
||||
<p>Sometimes we have problems displaying some Medium posts.</br></br></p>
|
||||
<p>If you have a problem that some images aren't loading - try using VPN. Probably you have problem with access to Medium CDN (or fucking Cloudflare's bot detection algorithms are blocking you).</p>
|
||||
</div>
|
||||
<form action="#" method="POST" class="mt-4" id="problem-form">
|
||||
<div class="mb-4">
|
||||
<label for="problem-description" class="block text-gray-700 dark:text-white font-bold mb-2">Problem Description</label>
|
||||
<textarea id="problem-description"
|
||||
name="problem-description"
|
||||
placeholder="Describe your problem here..."
|
||||
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
|
||||
rows="4"
|
||||
required></textarea>
|
||||
</div>
|
||||
<div>
|
||||
<button type="submit"
|
||||
class="m-2 bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">Submit</button>
|
||||
<button type="button"
|
||||
class="m-2 modal-close bg-gray-500 hover:bg-gray-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">Cancel</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
darkMode: 'class',
|
||||
}
|
||||
|
||||
function navigateToOrigin() {
|
||||
window.location.href = window.location.origin;
|
||||
}
|
||||
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
|
||||
// document.documentElement.classList.add('dark');
|
||||
document.getElementById('darkIcon').classList.remove('hidden');
|
||||
document.getElementById('lightIcon').classList.add('hidden')
|
||||
} else {
|
||||
// document.documentElement.classList.remove('dark')
|
||||
document.getElementById('lightIcon').classList.remove('hidden');
|
||||
document.getElementById('darkIcon').classList.add('hidden');
|
||||
}
|
||||
|
||||
document.getElementById('darkModeToggle').addEventListener('click', function() {
|
||||
|
||||
if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
|
||||
document.documentElement.classList.remove('dark');
|
||||
document.getElementById('darkIcon').classList.add('hidden');
|
||||
document.getElementById('lightIcon').classList.remove('hidden')
|
||||
document.documentElement.style.cssText = "--lightense-backdrop: white;";
|
||||
localStorage.setItem("theme", "light")
|
||||
} else {
|
||||
document.documentElement.classList.add('dark')
|
||||
document.getElementById('lightIcon').classList.add('hidden');
|
||||
document.getElementById('darkIcon').classList.remove('hidden');
|
||||
document.documentElement.style.cssText = "--lightense-backdrop: black;";
|
||||
localStorage.setItem("theme", "dark")
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script>
|
||||
const openModalButton = document.getElementById('openProblemModal');
|
||||
const closeModalButton = document.querySelector('.modal-close');
|
||||
const modal = document.getElementById('problemModal');
|
||||
const problemDescriptionInput = document.getElementById('problem-description');
|
||||
const submitButton = document.querySelector('form button');
|
||||
const body = document.body;
|
||||
|
||||
openModalButton.addEventListener('click', () => {
|
||||
body.classList.add('overflow-hidden'); // Prevent scrolling on the body
|
||||
modal.classList.remove('hidden');
|
||||
});
|
||||
|
||||
closeModalButton.addEventListener('click', () => {
|
||||
body.classList.remove('overflow-hidden'); // Re-enable scrolling on the body
|
||||
modal.classList.add('hidden');
|
||||
});
|
||||
|
||||
modal.addEventListener('click', (e) => {
|
||||
if (e.target === modal) {
|
||||
modal.classList.add('hidden');
|
||||
body.classList.remove('overflow-hidden');
|
||||
}
|
||||
});
|
||||
|
||||
function navigateNoCache() {
|
||||
window.location.href = `/render-no-cache${window.location.pathname}`;
|
||||
}
|
||||
|
||||
const submitForm = async (event) => {
|
||||
event.preventDefault();
|
||||
|
||||
console.log('Form submiting is started!');
|
||||
submitButton.disabled = true;
|
||||
|
||||
// Get the problem description from the input field
|
||||
const problemDescription = problemDescriptionInput.value;
|
||||
const currentPage = window.location.href;
|
||||
|
||||
try {
|
||||
// Send a POST request to the "report-problem" API endpoint
|
||||
const response = await fetch('/report-problem', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ description: problemDescription, page: currentPage }),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
// Report submitted successfully, you can add a success message or further actions here
|
||||
console.log('Problem report submitted successfully.');
|
||||
modal.classList.add('hidden'); // Close the modal
|
||||
} else {
|
||||
// Handle errors, such as non-200 responses
|
||||
console.error('Failed to submit problem report.');
|
||||
submitButton.disabled = false;
|
||||
}
|
||||
} catch (error) {
|
||||
// Handle network errors or other exceptions
|
||||
console.error('An error occurred:', error);
|
||||
submitButton.disabled = false;
|
||||
}
|
||||
};
|
||||
|
||||
document.getElementById('problem-form').onsubmit = submitForm;
|
||||
</script>
|
||||
<script>
|
||||
const h = document.documentElement, b = document.body;
|
||||
const st = 'scrollTop';
|
||||
const sh = 'scrollHeight';
|
||||
const progress = document.getElementById('progress');
|
||||
const header = document.getElementById('header');
|
||||
const navcontent = document.getElementById('nav-content');
|
||||
|
||||
document.addEventListener('scroll', function () {
|
||||
/* Refresh scroll % width */
|
||||
const scroll = (h[st] || b[st]) / ((h[sh] || b[sh]) - h.clientHeight) * 100;
|
||||
progress.style.setProperty('--scroll', scroll + '%');
|
||||
|
||||
/* Apply classes for slide in bar */
|
||||
const shouldAddClass = window.scrollY > 10;
|
||||
});
|
||||
|
||||
document.getElementById('nav-toggle').onclick = function() {
|
||||
document.getElementById("nav-content").classList.toggle("hidden");
|
||||
}
|
||||
|
||||
window.addEventListener('load', function () {
|
||||
Lightense('img:not(.no-lightense)');
|
||||
}, false);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
<script>
|
||||
var lazyLoadInstance = new LazyLoad({
|
||||
callback_loaded: function(element) {
|
||||
Lightense(element);
|
||||
},
|
||||
callback_error: (img) => {
|
||||
console.log(img);
|
||||
if (img.hasAttribute("data-src")) {
|
||||
if (img.attributes["data-src"].value.startsWith("https://miro.medium.com/v2/")) {
|
||||
img.setAttribute("src", img.attributes["data-src"].value.replace("https://miro.medium.com/v2/", "{{HOST_ADDRESS}}/@miro/v2/" ));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script>
|
||||
function navigateToOrigin() {
|
||||
window.location.href = window.location.origin;
|
||||
}
|
||||
</script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const notificationContainer = document.querySelector('.notification-container');
|
||||
const closeButton = document.querySelector('.close-button');
|
||||
|
||||
function showNotification() {
|
||||
if (!localStorage.getItem('showNotification-github-block')) {
|
||||
notificationContainer.style.display = 'block';
|
||||
}
|
||||
}
|
||||
|
||||
// Hide the notification
|
||||
function hideNotification() {
|
||||
localStorage.setItem('showNotification-github-block', 'false');
|
||||
notificationContainer.style.display = 'none';
|
||||
}
|
||||
|
||||
// Close button functionality
|
||||
closeButton.addEventListener('click', () => {
|
||||
hideNotification();
|
||||
});
|
||||
|
||||
showNotification();
|
||||
});
|
||||
</script>
|
||||
|
||||
21
server/templates/error.html
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
<div class="container w-full md:max-w-3xl mx-auto pt-40"></div>
|
||||
<div class="container w-full mx-auto pt-20 pb-20 break-words">
|
||||
<div class="flex flex-col items-center justify-center h-90">
|
||||
<div class="bg-green-500 text-white text-6xl font-bold p-6 rounded-lg shadow-lg">Oppps!</div>
|
||||
<div class="flex items-center justify-center mt-12">
|
||||
<p class="dark:text-gray-200 text-gray-700 text-lg">{{ error_msg }}</p>
|
||||
</div>
|
||||
{{ url_line }}
|
||||
<div class="flex md:max-w-2xl items-center justify-center mt-8">
|
||||
<p class="dark:text-gray-200 text-gray-700">We are aware of this error. Please try again later if this was an error on our part, we will fix it as soon as possible.</p>
|
||||
</div>
|
||||
<div class="flex items-center justify-center mt-28">
|
||||
<p class="p-5 bg-black text-white" style="font-family: monospace;">Your emergency transponder code: {{ transponder_code }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
const urlMedium = document.getElementById('medium-link-input');
|
||||
urlMedium.value = window.location.pathname.slice(1)
|
||||
urlMedium.value += window.location.search
|
||||
</script>
|
||||
9
server/templates/main.html
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
<div class="container w-full md:max-w-3xl mx-auto pt-40"></div>
|
||||
<div class="container w-full mx-auto pt-20 py-20 break-words">
|
||||
<div class="flex flex-col items-center justify-center h-60">
|
||||
<h1 class="md:max-w-3xl text-4xl font-bold text-center text-green-500 mt-8">Freedium: Your paywall breakthrough for Medium!</h1>
|
||||
{{ url_line }}
|
||||
</div>
|
||||
</div>
|
||||
{{ postleter }}
|
||||
<div class="mt-8"></div>
|
||||
118
server/templates/post.html
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
<div class="container w-full md:max-w-3xl mx-auto pt-20 break-words text-gray-900 dark:text-gray-200 bg-white dark:bg-gray-800">
|
||||
<div class="w-full px-4 md:px-6 text-xl text-gray-800 dark:text-gray-100 leading-normal" style="font-family:Georgia,serif;">
|
||||
<div class="font-sans">
|
||||
<p class="text-base md:text-sm text-green-500 font-bold pb-3">
|
||||
<a href="{{ url }}#bypass" class="text-sm md:text-sm text-green-500 font-bold no-underline hover:underline ">< Go to the original</a>
|
||||
</p>
|
||||
{% if previewImageId %}
|
||||
<img alt="Preview image"
|
||||
style="max-height: 65vh;
|
||||
width: auto;
|
||||
margin: auto"
|
||||
loading="eager"
|
||||
role="presentation"
|
||||
src="https://miro.medium.com/v2/resize:fit:700/{{ previewImageId }}">
|
||||
{% endif %}
|
||||
<h1 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 pt-6 pb-2 text-3xl md:text-4xl">{{ title }}</h1>
|
||||
{% if subtitle %}<h2 class="font-medium font-sans break-normal text-gray-600 dark:text-gray-200 pt-1 text-1xl md:text-1xl">{{ subtitle }}</h2>{% endif %}
|
||||
</div>
|
||||
<div class="bg-gray-100 dark:bg-gray-600 border border-gray-300 m-2 mt-5">
|
||||
<div class="flex items-center space-x-4 p-4">
|
||||
<div class="flex-shrink-0">
|
||||
<a href="https://medium.com/@{{ creator.username }}" target="_blank" title="{{ creator.bio }}" class="block relative">
|
||||
<img src="https://miro.medium.com/v2/resize:fill:88:88/{{ creator.imageId or '1*dmbNkD5D-u45r44go_cf0g.png' }}"
|
||||
alt="{{ creator.name }}"
|
||||
class="rounded-full h-11 w-11 no-lightense">
|
||||
<div class="absolute bottom-0 right-0 h-3 w-3 border-2 border-white bg-green-500 rounded-full"></div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="flex-grow">
|
||||
<a href="https://medium.com/@{{ creator.username }}"
|
||||
target="_blank"
|
||||
title="{{ creator.bio }}"
|
||||
class="block font-semibold text-gray-900 dark:text-white">{{ creator.name }}</a>
|
||||
<button class="text-sm text-white bg-green-500 px-3 py-1 rounded-lg mt-1 dark:bg-green-700">
|
||||
<a href="https://medium.com/@{{ creator.username }}"
|
||||
target="_blank"
|
||||
title="{{ creator.bio }}"
|
||||
class="block text-sm text-white">Follow</a>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="px-4 pb-2">
|
||||
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500 dark:text-white">
|
||||
{% if collection %}
|
||||
<a href="https://medium.com/{{ collection.slug }}"
|
||||
title="{{ collection.shortDescription }}"
|
||||
target="_blank"
|
||||
class="flex items-center space-x-1">
|
||||
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ collection.avatar.id }}"
|
||||
alt="{{ collection.name }}"
|
||||
class="h-4 w-4 rounded-full no-lightense">
|
||||
<p>{{ collection.name }}</p>
|
||||
</a>
|
||||
<span>·</span>
|
||||
{% endif %}
|
||||
<span class="text-gray-500 dark:text-white">~{{ readingTime }} min read</span>
|
||||
<span class="md:inline">·</span>
|
||||
<span class="text-gray-500 dark:text-white">{{ firstPublishedAt }} (Updated: {{ updatedAt }})</span>
|
||||
<span class="md:inline">·</span>
|
||||
<span class="text-yellow-500 dark:text-yellow-400">Free: {{ freeAccess }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="main-content mt-8">
|
||||
{% for paragraph in content %}{{ paragraph }}{% endfor %}
|
||||
</div>
|
||||
<div class="flex flex-wrap gap-2 mt-5">
|
||||
{% for tag in tags %}<a title="{{ tag.displayTitle }}" target="_blank" href="https://medium.com/tag/{{ tag.normalizedTagSlug }}"><span class="text-green-500 bg-green-100 px-2 py-1 rounded-full text-xs dark:bg-green-800 dark:text-gray-100">#{{ tag.normalizedTagSlug }}</span></a>{% endfor %}
|
||||
</div>
|
||||
<div class="container w-full md:max-w-3xl mx-auto pt-12"></div>
|
||||
</div>
|
||||
<style>
|
||||
.main-content {
|
||||
letter-spacing: -0.06px;
|
||||
font-family: source-serif-pro, Georgia, Cambria, "Times New Roman", Times, serif;
|
||||
}
|
||||
code {
|
||||
background-color: #e3e2e2;
|
||||
}
|
||||
pre {
|
||||
font-size: 75%;
|
||||
background-color: #e3e2e2;
|
||||
}
|
||||
p code, ul code, li code {
|
||||
font-size: 75%;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', (event) => {
|
||||
hljs.highlightAll();
|
||||
|
||||
document.querySelectorAll('pre code').forEach((el) => {
|
||||
code = el.textContent;
|
||||
el = el.parentElement;
|
||||
el.innerHTML = '<button class="hljs-copy p-1 bg-gray-300 dark:bg-black">Copy</button>' + el.innerHTML; // append copy button
|
||||
el.getElementsByClassName('hljs-copy')[0].contentCopy = code;
|
||||
el.getElementsByClassName('hljs-copy')[0].addEventListener("click", function () {
|
||||
this.innerText = 'Copying..';
|
||||
if (!navigator.userAgent.toLowerCase().includes('safari')) {
|
||||
navigator.clipboard.writeText(this.contentCopy);
|
||||
} else {
|
||||
prompt("Clipboard (Select: ⌘+a > Copy:⌘+c)", this.contentCopy);
|
||||
}
|
||||
this.innerText = 'Copied!';
|
||||
button = this;
|
||||
setTimeout(function () {
|
||||
button.innerText = 'Copy';
|
||||
}, 1500)
|
||||
});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
.hljs-copy {
|
||||
float: right;
|
||||
cursor: pointer;
|
||||
}
|
||||
</style>
|
||||
48
server/templates/postleter.html
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
<div class="p-2 grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4 w-full">
|
||||
{% for post in post_list %}
|
||||
<div class="p-6 bg-white dark:bg-gray-600 rounded-xl">
|
||||
<a class="group post_view" post_id="{{ post.post_id }}">
|
||||
<div class="max-h-72 items-center d-flex overflow-hidden">
|
||||
{% if post.preview_image_id %}
|
||||
<img data-src="https://miro.medium.com/v2/resize:fit:700/{{ post.preview_image_id }}"
|
||||
class="lazy w-full h-auto hover:scale-105 transition transition-all duration-200 ease-in-out">
|
||||
</div>
|
||||
{% else %}
|
||||
<img data-src="https://images.unsplash.com/photo-1636467204130-edf8ee206dce?ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&ixlib=rb-1.2.1&auto=format&fit=crop&w=600&q=80"
|
||||
class="lazy w-full h-auto hover:scale-105 transition transition-all duration-200 ease-in-out">
|
||||
</div>
|
||||
{% endif %}
|
||||
<h3 class="mt-6 leading-normal text-gray-800 dark:text-gray-100 group-hover:text-purple-400 font-semibold text-xl lg:text-2xl line-clamp-3 transition translation-all duration-200 ease-in-out">{{ post.title }}</h3>
|
||||
</a>
|
||||
<div class="mt-6">
|
||||
<div class="flex flex-wrap items-center space-x-2 text-sm text-gray-500 dark:text-white">
|
||||
{% if post.collection %}
|
||||
<a href="https://medium.com/{{ post.collection.slug }}"
|
||||
title="{{ post.collection.shortDescription }}"
|
||||
target="_blank"
|
||||
class="flex items-center space-x-1">
|
||||
<img src="https://miro.medium.com/v2/resize:fill:48:48/{{ post.collection.avatar.id }}"
|
||||
alt="{{ post.collection.name }}"
|
||||
loading="eager"
|
||||
class="h-4 w-4 rounded-full no-lightense">
|
||||
<p>{{ post.collection.name }}</p>
|
||||
</a>
|
||||
<span>·</span>
|
||||
{% endif %}
|
||||
<span class="text-gray-500 dark:text-white">~{{ post.reading_time }} min read</span>
|
||||
<span class="md:inline dark:text-white">·</span>
|
||||
<span class="text-gray-500 dark:text-white">{{ post.first_published_at }} (Updated: {{ post.updated_at }})</span>
|
||||
<span class="md:inline dark:text-white">·</span>
|
||||
<span class="text-yellow-500 dark:text-white">Free: {{ post.free_access }}</span>
|
||||
</div>
|
||||
<p class="mt-6 leading-normal line-clamp-3 text-gray-600 dark:text-gray-200">{{ post.description }}</p>
|
||||
</div>
|
||||
<a post_id="{{ post.post_id }}" class="inline-block mt-6 text-purple-500 hover:text-purple-400 post_view">Read More</a>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<script>
|
||||
for (let post_a_el of document.getElementsByClassName("post_view")) {
|
||||
post_a_el.href = `${window.location.origin}/${post_a_el.attributes["post_id"].value}`
|
||||
}
|
||||
</script>
|
||||
25
server/templates/url_line.html
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
<div class="md:max-w-6xl bg-white dark:bg-gray-600 w-full shadow-md rounded-md p-8 mt-8">
|
||||
<div class="flex items-center border rounded-md border-gray-300 px-4 py-2">
|
||||
<svg class="h-5 w-5 text-gray-500 dark:text-gray-100 mr-2"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
xmlns="http://www.w3.org/2000/svg">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 6v6m0 0v6m0-6h6m-6 0H6"></path>
|
||||
</svg>
|
||||
<input id="medium-link-input"
|
||||
type="text"
|
||||
placeholder="Enter Medium post link"
|
||||
class="w-full focus:outline-none text-green-500 dark:bg-gray-600 border-gray-300"
|
||||
onkeydown="if (event.keyCode == 13) document.getElementById('go-button').click()">
|
||||
<button id="go-button" class="ml-2 bg-green-500 text-white px-4 py-2 rounded-md hover:bg-green-600 focus:outline-none">Go</button>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
const goButton = document.getElementById('go-button');
|
||||
|
||||
goButton.addEventListener('click', function() {
|
||||
const mediumLinkInput = document.getElementById('medium-link-input');
|
||||
window.location.href = `${window.location.origin}/${mediumLinkInput.value}`;
|
||||
});
|
||||
</script>
|
||||
44
server/utils/cache.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import pickle
|
||||
from server import redis_storage
|
||||
from functools import wraps
|
||||
from loguru import logger
|
||||
from server.utils.utils import safe_check_redis_connection
|
||||
|
||||
|
||||
def aio_redis_cache(expire_time: int = 60 * 10): # enable_pickle: bool = False
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
if not await safe_check_redis_connection(redis_storage):
|
||||
logger.error("REDIS is not available!")
|
||||
return await func(*args, **kwargs)
|
||||
# logger.trace(f"{enable_pickle=}, {expire_time=}")
|
||||
logger.trace(f"{expire_time=}")
|
||||
# Serialize the arguments and function name as a key for Redis
|
||||
key = "{}-{}".format(func.__name__, ",".join(str(arg) for arg in args))
|
||||
logger.trace(f"REDIS key: {key}")
|
||||
result = await redis_storage.get(key)
|
||||
|
||||
if result is not None:
|
||||
# If the result is found in Redis cache, deserialize and return it
|
||||
# if enable_pickle: # type(result).__name__ != "str"
|
||||
result_raw = pickle.loads(result)
|
||||
# else:
|
||||
# result = result.decode("utf-8")
|
||||
logger.trace("Result found in REDIS")
|
||||
else:
|
||||
logger.trace("Result not found in REDIS")
|
||||
# If the result is not found in Redis cache, call the original function
|
||||
result_raw = await func(*args, **kwargs)
|
||||
# if enable_pickle:
|
||||
result = pickle.dumps(result_raw)
|
||||
# else:
|
||||
# result = result.encode("utf-8")
|
||||
# Store the result in Redis with an expiration time
|
||||
await redis_storage.setex(key, expire_time, result)
|
||||
|
||||
return result_raw
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
55
server/utils/error.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import random
|
||||
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from server import (
|
||||
base_template,
|
||||
config,
|
||||
error_template,
|
||||
transponder_code_correlation,
|
||||
url_correlation
|
||||
)
|
||||
from server.utils.logger_trace import trace
|
||||
from server.utils.notify import send_message
|
||||
|
||||
# ChatGPT promt: Make this text more Humoristic in one sentenced text, 15 different with emojies as Python list: Sorry to hear that but we have some problem
|
||||
ERROR_MSG_LIST = [
|
||||
"Sorry to hear that, but we've got a problem that's bigger than my inability to resist a donut! 🍩",
|
||||
"Apologies for the inconvenience, but we've hit a snag - it's not as funny as my cat chasing its tail, but it's a problem nonetheless! 🐱",
|
||||
"Sorry to hear that, but we've encountered a problem - it's not as entertaining as a clown at a circus, but it's there! 🎪",
|
||||
"Oops! We've stumbled upon a problem, but don't worry, it's not as disastrous as my cooking! 🍳",
|
||||
"Sorry to hear that, but we've got a problem that's more stubborn than a mule on a Monday morning! 🐴",
|
||||
"Apologies, but we've run into a problem - it's not as amusing as my grandma's dance moves, but it's a problem! 👵💃",
|
||||
"Sorry to hear that, but we've got a problem that's more tangled than my headphone wires! 🎧",
|
||||
"Oops! We've hit a problem, but don't worry, it's not as catastrophic as my last blind date! 💔",
|
||||
"Sorry to hear that, but we've got a problem that's more elusive than a sock in a washing machine! 🧦",
|
||||
"Apologies, but we've run into a problem - it's not as hilarious as my attempt at yoga, but it's a problem! 🧘♂️",
|
||||
"Sorry to hear that, but we've got a problem that's more confusing than a chameleon in a bag of Skittles! 🦎🌈",
|
||||
"Oops! We've encountered a problem, but don't worry, it's not as disastrous as my attempt at karaoke! 🎤",
|
||||
"Sorry to hear that, but we've got a problem that's more stubborn than a toddler refusing to eat their veggies! 👶🥦",
|
||||
"Apologies, but we've run into a problem - it's not as amusing as my dog trying to catch its tail, but it's a problem! 🐶",
|
||||
"Sorry to hear that, but we've got a problem that's more elusive than the end of a rainbow! 🌈"
|
||||
]
|
||||
|
||||
|
||||
@trace
|
||||
async def generate_error(error_msg: str = None, title: str = "Error", status_code: int = 500, quiet: bool = False):
|
||||
if not error_msg:
|
||||
error_msg = random.choice(ERROR_MSG_LIST)
|
||||
|
||||
"""
|
||||
if not quiet:
|
||||
await send_message(
|
||||
f"📛 Error while processing url: <code>{url_correlation.get()}</code>, transponder_code: <code>{transponder_code_correlation.get()}</code>, error: <code>{error_msg}</code>"
|
||||
)
|
||||
"""
|
||||
|
||||
error_template_rendered = await error_template.render_async(error_msg=error_msg, transponder_code=transponder_code_correlation.get())
|
||||
base_context = {
|
||||
"enable_ads_header": config.ENABLE_ADS_BANNER,
|
||||
"body_template": error_template_rendered,
|
||||
"title": title,
|
||||
}
|
||||
base_template_rendered = await base_template.render_async(base_context, HOST_ADDRESS=config.HOST_ADDRESS)
|
||||
return HTMLResponse(base_template_rendered, status_code=status_code)
|
||||
|
||||
117
server/utils/logger.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
# Source: https://pawamoy.github.io/posts/unify-logging-for-a-gunicorn-uvicorn-app/
|
||||
# This code taken from comment by GroverChouT
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
from gunicorn.glogging import Logger
|
||||
from loguru import logger
|
||||
from loguru._datetime import datetime as loguru_datetime
|
||||
|
||||
from server import START_TIME, config
|
||||
|
||||
ENQUEUE = True
|
||||
|
||||
# Python's logging module is not supporting TRACE level
|
||||
# https://bugs.python.org/issue31732
|
||||
# https://betterstack.com/community/guides/logging/how-to-start-logging-with-python/
|
||||
logging.addLevelName("TRACE", 5)
|
||||
|
||||
BACKTRACE = True
|
||||
DIAGNOSE = True
|
||||
LOG_LEVEL = logging.getLevelName(config.LOG_LEVEL_NAME)
|
||||
|
||||
LOG_FORMAT = "[{process.id}] | <green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>[{extra[id]}] - {message}</level>"
|
||||
LOG_FOLDER_PATH = f"server/user_data/logs/{{time:YYYY-MM-DD}}/{START_TIME}"
|
||||
LOG_FOLDER_PATH_FORMATED = LOG_FOLDER_PATH.format(time=loguru_datetime.now())
|
||||
|
||||
|
||||
def logger_register():
|
||||
pid = os.getpid()
|
||||
handlers = [
|
||||
{
|
||||
"sink": sys.stdout,
|
||||
"level": LOG_LEVEL,
|
||||
"format": LOG_FORMAT,
|
||||
"enqueue": ENQUEUE,
|
||||
"backtrace": BACKTRACE,
|
||||
"diagnose": DIAGNOSE,
|
||||
},
|
||||
{
|
||||
"sink": f"{LOG_FOLDER_PATH}/standart_{pid}_log_server",
|
||||
"level": LOG_LEVEL,
|
||||
"format": LOG_FORMAT,
|
||||
"enqueue": ENQUEUE,
|
||||
}
|
||||
]
|
||||
if config.MORE_LOGS:
|
||||
handlers.append({
|
||||
"sink": f"{LOG_FOLDER_PATH}/trace_{pid}_log_server",
|
||||
"level": "TRACE",
|
||||
"format": LOG_FORMAT,
|
||||
"enqueue": ENQUEUE,
|
||||
})
|
||||
handlers.append({
|
||||
"sink": f"{LOG_FOLDER_PATH}/debug_{pid}_log_server",
|
||||
"level": "DEBUG",
|
||||
"format": LOG_FORMAT,
|
||||
"enqueue": ENQUEUE,
|
||||
})
|
||||
logger.configure(
|
||||
handlers=handlers,
|
||||
extra={"id": None},
|
||||
)
|
||||
|
||||
|
||||
class InterceptHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
# Get corresponding Loguru level if it exists
|
||||
try:
|
||||
level = logger.level(record.levelname).name
|
||||
except ValueError:
|
||||
level = record.levelno
|
||||
|
||||
# Find caller from where originated the logged message
|
||||
frame, depth = logging.currentframe(), 2
|
||||
while frame.f_code.co_filename == logging.__file__:
|
||||
frame = frame.f_back
|
||||
depth += 1
|
||||
|
||||
raw_message = record.getMessage()
|
||||
|
||||
try:
|
||||
logger.opt(depth=depth, exception=record.exc_info).log(level, raw_message)
|
||||
except Exception as ex:
|
||||
pprint(raw_message)
|
||||
print(raw_message)
|
||||
raise ex
|
||||
|
||||
|
||||
class GunicornLogger(Logger):
|
||||
def setup(self, cfg) -> None:
|
||||
handler = InterceptHandler()
|
||||
# logging.getLogger("gunicorn.error").handlers = [InterceptHandler()]
|
||||
# logging.getLogger("gunicorn.access").handlers = [InterceptHandler()]
|
||||
# Add log handler to logger and set log level
|
||||
self.error_log.addHandler(handler)
|
||||
self.error_log.setLevel(LOG_LEVEL)
|
||||
self.access_log.addHandler(handler)
|
||||
self.access_log.setLevel(LOG_LEVEL)
|
||||
|
||||
# Configure logger before gunicorn starts logging
|
||||
logger_register()
|
||||
|
||||
|
||||
def configure_logger() -> None:
|
||||
logging.root.handlers = [InterceptHandler()]
|
||||
logging.root.setLevel(LOG_LEVEL)
|
||||
|
||||
# Remove all log handlers and propagate to root logger
|
||||
for name in logging.root.manager.loggerDict.keys():
|
||||
logging.getLogger(name).handlers = []
|
||||
logging.getLogger(name).propagate = True
|
||||
|
||||
# Configure logger (again) if gunicorn is not used
|
||||
logger_register()
|
||||
39
server/utils/logger_trace.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import asyncio
|
||||
import time
|
||||
from functools import wraps
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def trace(func):
|
||||
if asyncio.iscoroutinefunction(func):
|
||||
logger.trace(f"{func.__name__!r} function is a coroutine")
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_ts = time.time()
|
||||
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
|
||||
original_result = await func(*args, **kwargs)
|
||||
logger.trace(f"Result: {original_result}")
|
||||
logger.trace(f"Result type: {type(original_result)}")
|
||||
duration_ts = time.time() - start_ts
|
||||
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
|
||||
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
|
||||
return original_result
|
||||
|
||||
else:
|
||||
logger.trace(f"{func.__name__!r} is not a coroutine")
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_ts = time.time()
|
||||
logger.trace(f"Calling {func.__name__}() with {args}, {kwargs}")
|
||||
original_result = func(*args, **kwargs)
|
||||
logger.trace(f"Result: {original_result}")
|
||||
logger.trace(f"Result type: {type(original_result)}")
|
||||
duration_ts = time.time() - start_ts
|
||||
result = f"{original_result[:42]}..." if type(original_result).__name__ in ["str", "bytes"] else original_result
|
||||
logger.trace(f"{func.__name__!r}() returned {result!r} in {duration_ts:.2} seconds")
|
||||
return original_result
|
||||
|
||||
return wrapper
|
||||
30
server/utils/loguru_handler.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# Based on: https://stackoverflow.com/a/72735401/13452914
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class InterceptHandler(logging.Handler):
|
||||
"""
|
||||
Add logging handler to augment python stdlib logging.
|
||||
|
||||
Logs which would otherwise go to stdlib logging are redirected through
|
||||
loguru.
|
||||
"""
|
||||
|
||||
@logger.catch(default=True, onerror=lambda _: sys.exit(1))
|
||||
def emit(self, record):
|
||||
# Get corresponding Loguru level if it exists.
|
||||
try:
|
||||
level = logger.level(record.levelname).name
|
||||
except ValueError:
|
||||
level = record.levelno
|
||||
|
||||
# Find caller from where originated the logged message.
|
||||
frame, depth = sys._getframe(6), 6
|
||||
while frame and frame.f_code.co_filename == logging.__file__:
|
||||
frame = frame.f_back
|
||||
depth += 1
|
||||
|
||||
logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
|
||||
44
server/utils/notify.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import asyncio
|
||||
import aiohttp
|
||||
from enum import Enum
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from server import config
|
||||
|
||||
|
||||
class MessageStatus(Enum):
|
||||
ERROR = "ERROR"
|
||||
GOOD = "GOOD"
|
||||
|
||||
|
||||
async def send_message(text: str, silent: bool = False, status: MessageStatus = "ERROR") -> None:
|
||||
asyncio.create_task(task_send_message(text, silent, status))
|
||||
|
||||
|
||||
async def task_send_message(text: str, silent: bool = False, status: MessageStatus = "ERROR") -> None:
|
||||
if not config.TELEGRAM_BOT_TOKEN or not config.TELEGRAM_ADMIN_ID:
|
||||
logger.warning("Can't send log messages, because of lack of some informations. Ignore....")
|
||||
return
|
||||
|
||||
if status == MessageStatus.GOOD.value:
|
||||
return True
|
||||
|
||||
if len(text) > 4000:
|
||||
logger.warning(f"Message is too long ({len(text)}): {text}")
|
||||
text = text[:4000]
|
||||
|
||||
url = f"https://api.telegram.org/bot{config.TELEGRAM_BOT_TOKEN}/sendMessage"
|
||||
data = {
|
||||
"chat_id": config.TELEGRAM_ADMIN_ID,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
"disable_notification": silent
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, data=data) as response:
|
||||
if response.status == 200:
|
||||
logger.info("Message sent successfully")
|
||||
else:
|
||||
logger.warning(f"Failed to send message. Status: {response.status}")
|
||||
49
server/utils/utils.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import random
|
||||
import re
|
||||
import socket
|
||||
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
DEFAULT_PROTOCOL = "https://"
|
||||
|
||||
|
||||
@trace
|
||||
def correct_url(url: str) -> str:
|
||||
# Workaround for Safari bug
|
||||
url = re.sub(r"https?://?", DEFAULT_PROTOCOL, url)
|
||||
|
||||
# parsed_url = urlparse(url)
|
||||
# if not bool(parsed_url.netloc and parsed_url.scheme):
|
||||
# return DEFAULT_PROTOCOL + url
|
||||
|
||||
# if not re.match(r'http[s]?://', url):
|
||||
# url = DEFAULT_PROTOCOL + url
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def string_to_number_ascii(input_str: str, key_number: int = None):
|
||||
if not key_number:
|
||||
key_number = random.randint(0, 100)
|
||||
input_str = input_str.upper()
|
||||
result = sum(ord(char) for char in input_str)
|
||||
result *= key_number
|
||||
return result
|
||||
|
||||
|
||||
def is_negative(num: int) -> bool:
|
||||
return num < 0
|
||||
|
||||
|
||||
async def safe_check_redis_connection(connection):
|
||||
try:
|
||||
response = await connection.ping()
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
return response
|
||||
|
||||
|
||||
def is_port_in_use(port: int) -> bool:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
return s.connect_ex(("localhost", port)) == 0
|
||||
67
server/worker.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import atexit
|
||||
import multiprocessing
|
||||
from multiprocessing.util import _exit_function
|
||||
|
||||
import gunicorn.app.base
|
||||
import uvicorn
|
||||
from loguru import logger
|
||||
|
||||
from server import config, ban_db
|
||||
from server.main import app
|
||||
from server.utils.logger import GunicornLogger
|
||||
from server.utils.logger_trace import trace
|
||||
|
||||
logger.trace(f"Uvicorn version: {uvicorn.__version__}")
|
||||
|
||||
|
||||
def post_worker_init(worker):
|
||||
# Remove the atexit handler set up by the parent process
|
||||
# https://github.com/benoitc/gunicorn/issues/1391#issuecomment-467010209
|
||||
logger.trace("Removing atexit handler")
|
||||
atexit.unregister(_exit_function)
|
||||
|
||||
|
||||
def on_exit():
|
||||
logger.debug("GUNICORN: On exit")
|
||||
ban_db.dump()
|
||||
|
||||
|
||||
@trace
|
||||
def number_of_workers():
|
||||
cores = multiprocessing.cpu_count()
|
||||
if cores >= 8:
|
||||
workers = cores
|
||||
else:
|
||||
workers = cores * 2
|
||||
# workers = (cores * 2) + 2
|
||||
logger.debug(f"Number of workers: {workers}")
|
||||
return workers
|
||||
|
||||
|
||||
class GunicornStandaloneApplication(gunicorn.app.base.BaseApplication):
|
||||
def __init__(self, app, options=None):
|
||||
self.options = options or {}
|
||||
self.application = app
|
||||
super().__init__()
|
||||
|
||||
def load_config(self):
|
||||
config = {key: value for key, value in self.options.items() if key in self.cfg.settings and value is not None}
|
||||
for key, value in config.items():
|
||||
self.cfg.set(key.lower(), value)
|
||||
|
||||
def load(self):
|
||||
return self.application
|
||||
|
||||
|
||||
def execute_server_worker(host: str, port: int):
|
||||
options = {
|
||||
"bind": f"{host}:{port}",
|
||||
"workers": number_of_workers(),
|
||||
"logger_class": GunicornLogger,
|
||||
"worker_class": "uvicorn.workers.UvicornWorker",
|
||||
"preload_app": True,
|
||||
"post_worker_init": post_worker_init,
|
||||
"timeout": config.WORKER_TIMEOUT,
|
||||
# "on_exit": on_exit,
|
||||
}
|
||||
GunicornStandaloneApplication(app, options).run()
|
||||
BIN
static/android-chrome-192x192.png
Normal file
|
After Width: | Height: | Size: 9.4 KiB |
BIN
static/android-chrome-512x512.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
static/apple-touch-icon.png
Normal file
|
After Width: | Height: | Size: 6.9 KiB |
9
static/browserconfig.xml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<browserconfig>
|
||||
<msapplication>
|
||||
<tile>
|
||||
<square150x150logo src="/mstile-150x150.png"/>
|
||||
<TileColor>#00aba9</TileColor>
|
||||
</tile>
|
||||
</msapplication>
|
||||
</browserconfig>
|
||||
BIN
static/favicon-16x16.png
Normal file
|
After Width: | Height: | Size: 1.1 KiB |
BIN
static/favicon-32x32.png
Normal file
|
After Width: | Height: | Size: 1.6 KiB |
BIN
static/favicon.ico
Normal file
|
After Width: | Height: | Size: 15 KiB |
1
static/humans.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
Me in touch: https://github.com/ZhymabekRoman
|
||||
BIN
static/mstile-144x144.png
Normal file
|
After Width: | Height: | Size: 7 KiB |
BIN
static/mstile-150x150.png
Normal file
|
After Width: | Height: | Size: 6.8 KiB |
BIN
static/mstile-310x150.png
Normal file
|
After Width: | Height: | Size: 7.4 KiB |