diff --git a/docker-compose/docker-compose.main.yml b/docker-compose/docker-compose.main.yml index 5ddb3fc..85025e9 100644 --- a/docker-compose/docker-compose.main.yml +++ b/docker-compose/docker-compose.main.yml @@ -38,7 +38,7 @@ services: - dante_1 - dante_2 # volumes: - # - ./web:/app/web + # - ../web:/app/web # - ./core/medium_parser/:/app/medium_parser # - ./core/rl_string_helper/:/app/rl_string_helper ports: diff --git a/medium-parser/medium_parser/core.py b/medium-parser/medium_parser/core.py index ed12b83..663d9e3 100644 --- a/medium-parser/medium_parser/core.py +++ b/medium-parser/medium_parser/core.py @@ -220,8 +220,11 @@ class MediumParser: def parse_paragraph_text( text: str, markups: list, is_code: bool = False ) -> RLStringHelper: - if is_code: - quote_html_type = ["minimal"] + # Hotfix, workaround for code block + has_code_block = any(markup["type"] == "CODE" for markup in markups) + if is_code or has_code_block: + # quote_html_type = ["minimal"] + quote_html_type = None else: quote_html_type = ["full"] text_formater = RLStringHelper(text, quote_html_type=quote_html_type) @@ -315,10 +318,12 @@ class MediumParser: if out_paragraphs: css_class.append("pt-12") header_template = jinja_env.from_string( - '
{{text}}"
)
else:
continue
- template = jinja_env_debug.from_string(template)
+ template = jinja_env_debug.from_string(template_rendered)
markup["template"] = template
markups_out.append(markup)
diff --git a/rl_string_helper/rl_string_helper/__init__.py b/rl_string_helper/rl_string_helper/__init__.py
index c22dee0..8476adb 100644
--- a/rl_string_helper/rl_string_helper/__init__.py
+++ b/rl_string_helper/rl_string_helper/__init__.py
@@ -1,2 +1,7 @@
-from .string_helper import RLStringHelper, split_overlapping_ranges
+from .string_helper import (
+ RLStringHelper,
+ UTF16Handler,
+ StringAssignmentMixin,
+ split_overlapping_ranges,
+)
from .utils import quote_html, quote_symbol
diff --git a/rl_string_helper/test.txt b/rl_string_helper/test.txt
new file mode 100644
index 0000000..c18b1df
--- /dev/null
+++ b/rl_string_helper/test.txt
@@ -0,0 +1 @@
+Noah dragged his two printers out from Settings ⚙️ < Printers & Scanners 🖨️ and dropped them in Dock or Desktop, I don't remember — but you can drag to both the places.
diff --git a/rl_string_helper/tests/test_rl_string_helper.py b/rl_string_helper/tests/test_rl_string_helper.py
index e56adca..27e766d 100644
--- a/rl_string_helper/tests/test_rl_string_helper.py
+++ b/rl_string_helper/tests/test_rl_string_helper.py
@@ -3,6 +3,8 @@ import re
from loguru import logger
from rl_string_helper import (
RLStringHelper,
+ UTF16Handler,
+ StringAssignmentMixin,
quote_html,
split_overlapping_ranges,
)
@@ -133,52 +135,37 @@ class TestRLStringHelper:
for markup in parsed_markups:
helper.set_template(markup["start"], markup["end"], markup["template"])
- expected_output = (
- "The quick (brown) fox jumps "
- 'over 13 lazy dogs!'
- )
+ expected_output = """The quick (brown) fox jumps over 13 lazy dogs!"""
+
assert str(helper) == expected_output
def test_nmultibyte_emoji(self):
- from medium_parser.markups import parse_markups
+ text = "Noah dragged his two printers out from Settings ⚙️ < Printers & Scanners 🖨️ and dropped them in Dock or Desktop, I don’t remember — but you can drag to both the places."
+ string_helper = StringAssignmentMixin(text)
+ utf_handler = UTF16Handler()
+ string_pos_matrix = list(range(len(text)))
+ updated_text, string_pos_matrix, utf_16_bang_list = utf_handler.pre_utf_16_bang(
+ string_helper, string_pos_matrix
+ )
+ updated_text, string_pos_matrix = utf_handler.post_utf_16_bang(
+ updated_text, string_pos_matrix, utf_16_bang_list
+ )
+ assert str(updated_text) == text
+ from icecream import ic
- data = {
- "__typename": "Paragraph",
- "id": "236e7049b537_33",
- "name": "ba8c",
- "href": None,
- "text": "Noah dragged his two printers out from Settings ⚙️ < Printers & Scanners \ud83d\udda8️ and dropped them in Dock or Desktop, I don’t remember — but you can drag to both the places.",
- "iframe": None,
- "layout": None,
- "markups": [
- {
- "__typename": "Markup",
- "name": None,
- "type": "CODE",
- "start": 39,
- "end": 76,
- "href": None,
- "title": None,
- "rel": None,
- "anchorType": None,
- "userId": None,
- "creatorIds": None,
- }
- ],
- "metadata": None,
- "mixtapeMetadata": None,
- "type": "P",
- "hasDropCap": None,
- "dropCapImage": None,
- "codeBlockMetadata": None,
- }
- helper = RLStringHelper(data["text"])
- parsed_markups = split_overlapping_ranges(parse_markups(data["markups"]))
- for markup in parsed_markups:
- helper.set_template(markup["start"], markup["end"], markup["template"])
- print(str(helper))
+ string_helper = RLStringHelper(text, None)
+ string_helper.set_template(0, 100000, "{{text}}")
+ assert str(string_helper) == f"{text}".replace("’", "'")
- assert str(helper) == data["text"]
+ string_helper = RLStringHelper(text, ["None"])
+ string_helper.set_template(39, 76, "{{text}}")
+ with open("test.txt", "w") as f:
+ f.write(str(string_helper) + "\n")
+ assert str(
+ string_helper
+ ) == f"{text[:39]}Settings ⚙️ < Printers & Scanners 🖨️{text[76:]}".replace(
+ "’", "'"
+ )
def test_basic_replace(self):
# Replace A to B - ONE to ONE char
diff --git a/test.py b/test.py
index 76acb3a..f716178 100644
--- a/test.py
+++ b/test.py
@@ -1,6 +1,8 @@
import requests
-server_url = input("Enter your Freedium instance server URL (for example: http://localhost:6752/ ): ")
+server_url = input(
+ "Enter your Freedium instance server URL (for example: http://localhost:6752/ ): "
+)
# List of some problematic Medium posts
url_for_test = {
@@ -9,18 +11,20 @@ url_for_test = {
"https://levelup.gitconnected.com/some-linux-commands-that-can-boost-your-work-efficiency-dramatically-9dc802a10618",
"https://leshchuk.medium.com/http-cache-on-rails-nginx-stack-950fee2f8eef",
"https://medium.com/swlh/35-actionable-tips-to-grow-your-medium-blog-4e4017b89905",
- "https://valeman.medium.com/benchmarking-neural-prophet-part-i-neural-prophet-vs-prophet-252990763468",
+ "https://valeman.medium.com/benchmarking-neural-prophet-part-i-neural-prophet-vs-prophet-252990763468", # Stil have a problems
"https://valeman.medium.com/python-vs-r-for-time-series-forecasting-395390432598",
"https://medium.com/@aleb/how-to-generate-random-user-agents-with-an-api-22aad3d232cb",
"https://medium.com/angular-in-depth/the-best-way-to-unsubscribe-rxjs-observable-in-the-angular-applications-d8f9aa42f6a0",
- "515dd5a43948", # full address: https://medium.com/macoclock/12-macos-apps-so-good-you-will-wonder-how-they-are-free-515dd5a43948
+ "515dd5a43948", # full address: https://medium.com/macoclock/12-macos-apps-so-good-you-will-wonder-how-they-are-free-515dd5a43948
"https://anudeep-vysyaraju.medium.com/how-any-gitamite-can-get-free-linkedin-premium-membership-d4222bd1a0b3", # <--- Check for non properly aligned emojies
- "http://freedium.cfd/https://johndanielraines.medium.com/be-an-engineer-not-a-frameworker-c58fe28d0c88",
- "https://medium.com/coding-beauty/parseint-strange-behavior-cdff5e1f9ff7" # <---- Title renderer some weird characters
+ "https://johndanielraines.medium.com/be-an-engineer-not-a-frameworker-c58fe28d0c88",
+ "https://medium.com/coding-beauty/parseint-strange-behavior-cdff5e1f9ff7", # <---- Title renderer some weird characters
+ "https://medium.com/macoclock/the-11-craziest-and-most-advanced-macos-tips-tricks-ive-ever-seen-cd842ce3f0a3", # Still have some problem on paragraph_id 236e7049b537_43
}
blacklist_url = {"51e23c5a2aac"}
+
def main():
for url in url_for_test:
print(f"Processing: {url}")
@@ -29,5 +33,5 @@ def main():
raise ValueError(f"Can't process URL: {url}")
-if __name__ == '__main__':
+if __name__ == "__main__":
main()