diff --git a/Dockerfile b/Dockerfile index 4bd3453..8030d34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,8 +15,8 @@ RUN pip3 install --no-cache-dir ./rl_string_helper COPY ./database-lib ./database-lib RUN pip3 install --no-cache-dir ./database-lib -COPY ./core ./core -RUN pip3 install --no-cache-dir ./core +COPY ./medium-parser ./medium-parser +RUN pip3 install --no-cache-dir ./medium-parser COPY ./web ./web diff --git a/core/.gitmodules b/core/.gitmodules deleted file mode 100644 index ab3db11..0000000 --- a/core/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "medium_parser/toolkits/rl_string_helper"] - path = medium_parser/toolkits/rl_string_helper - url = https://github.com/Freedium-cfd/rl-string-helper \ No newline at end of file diff --git a/database-lib/database_lib/main.py b/database-lib/database_lib/main.py index bd14ebc..5052f5a 100644 --- a/database-lib/database_lib/main.py +++ b/database-lib/database_lib/main.py @@ -32,6 +32,8 @@ class CacheData: def __str__(self): return self.data + def has_data(self): + return self.data is not None and self.data != "" class CacheResponse: __slots__ = ("key", "data") diff --git a/core/.gitignore b/medium-parser/.gitignore similarity index 100% rename from core/.gitignore rename to medium-parser/.gitignore diff --git a/core/README.md b/medium-parser/README.md similarity index 100% rename from core/README.md rename to medium-parser/README.md diff --git a/core/medium_parser/__init__.py b/medium-parser/medium_parser/__init__.py similarity index 100% rename from core/medium_parser/__init__.py rename to medium-parser/medium_parser/__init__.py diff --git a/core/medium_parser/api.py b/medium-parser/medium_parser/api.py similarity index 100% rename from core/medium_parser/api.py rename to medium-parser/medium_parser/api.py diff --git a/core/medium_parser/bypass_cloudflare.py b/medium-parser/medium_parser/bypass_cloudflare.py similarity index 100% rename from core/medium_parser/bypass_cloudflare.py rename to medium-parser/medium_parser/bypass_cloudflare.py diff --git a/core/medium_parser/core.py b/medium-parser/medium_parser/core.py similarity index 66% rename from core/medium_parser/core.py rename to medium-parser/medium_parser/core.py index a0e8d83..87af8e4 100644 --- a/core/medium_parser/core.py +++ b/medium-parser/medium_parser/core.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import math import textwrap @@ -10,28 +12,61 @@ import tld from asyncer import asyncify from loguru import logger -from rl_string_helper import RLStringHelper, parse_markups, split_overlapping_ranges +from rl_string_helper import RLStringHelper, split_overlapping_ranges from . import jinja_env -from .exceptions import InvalidMediumPostID, InvalidMediumPostURL, InvalidURL, MediumParserException, MediumPostQueryError +from .exceptions import ( + InvalidMediumPostID, + InvalidMediumPostURL, + InvalidURL, + MediumParserException, + MediumPostQueryError, +) from .api import MediumApi from .models.html_result import HtmlResult from .time import convert_datetime_to_human_readable -from .utils import correct_url, extract_hex_string, getting_percontage_of_match, is_has_valid_medium_post_id, is_valid_medium_url, is_valid_url, resolve_medium_url +from .utils import ( + correct_url, + extract_hex_string, + getting_percontage_of_match, + is_has_valid_medium_post_id, + is_valid_medium_url, + is_valid_url, + resolve_medium_url, +) +from .markups import parse_markups if typing.TYPE_CHECKING: from database_lib import AbstractCacheBackend class MediumParser: - __slots__ = ("cache", "host_address", "jinja_template", "post_template", "timeout", "medium_api") + __slots__ = ( + "cache", + "host_address", + "jinja_template", + "post_template", + "timeout", + "medium_api", + ) - def __init__(self, cache: "AbstractCacheBackend", medium_api: MediumApi, timeout: int, host_address: str, template_folder: str = "./templates"): + def __init__( + self, + cache: AbstractCacheBackend, + medium_api: MediumApi, + timeout: int, + host_address: str, + template_folder: str = "./templates", + ): self.timeout: int = timeout self.cache: AbstractCacheBackend = cache self.host_address: str = host_address - self.jinja_template: jinja2.Environment = jinja2.Environment(loader=jinja2.FileSystemLoader(template_folder)) - self.post_template: jinja2.Template = self.jinja_template.get_template("post.html") + self.jinja_template: jinja2.Environment = jinja2.Environment( + loader=jinja2.FileSystemLoader(template_folder) + ) + self.post_template: jinja2.Template = self.jinja_template.get_template( + "post.html" + ) self.medium_api: MediumApi = medium_api async def resolve(self, unknown: str) -> str: @@ -52,7 +87,9 @@ class MediumParser: post_id = await resolve_medium_url(sanitized_url, self.timeout) if not post_id: - raise InvalidMediumPostURL(f"Could not find Medium post ID for URL: {sanitized_url}") + raise InvalidMediumPostURL( + f"Could not find Medium post ID for URL: {sanitized_url}" + ) return post_id @@ -64,7 +101,8 @@ class MediumParser: async def _get_from_cache(): logger.debug("Using cache backend") post_data = self.cache.pull(post_id) - if post_data: + logger.info(f"Found data in cache: {post_data[:10]}") + if post_data and post_data.data.has_data(): logger.debug("post query was found on cache") return post_data.json() logger.debug(f"No data found in cache by {post_id}") @@ -76,6 +114,7 @@ class MediumParser: logger.debug("Timeout while waiting for cache") return None except Exception as e: + logger.exception(e) logger.error(f"Error while waiting for cache: {e}") return None @@ -109,7 +148,13 @@ class MediumParser: return post_data, cache_used - async def query(self, post_id: str, use_cache: bool = True, retry: int = 2, force_cache: bool = False): + async def query( + self, + post_id: str, + use_cache: bool = True, + retry: int = 2, + force_cache: bool = False, + ): logger.debug(f"Medium QUERY: {use_cache=}, {retry=}, {force_cache=}") post_data, is_cache_used = None, False @@ -118,7 +163,9 @@ class MediumParser: reason = None while not post_data and attempt < retry: try: - post_data, is_cache_used = await self.query_get(post_id, use_cache, force_cache) + post_data, is_cache_used = await self.query_get( + post_id, use_cache, force_cache + ) if not post_data: reason = "No post data returned" @@ -144,7 +191,9 @@ class MediumParser: if not reason: reason = "Unknown" - raise MediumPostQueryError(f"Could not query post by ID from API: {post_id}. Reason: {reason}") + raise MediumPostQueryError( + f"Could not query post by ID from API: {post_id}. Reason: {reason}" + ) if not is_cache_used: logger.debug("Pushing post data to cache") @@ -153,13 +202,23 @@ class MediumParser: logger.trace(f"Query: done") return post_data - def _parse_and_render_content_html_post(self, content: dict, title: str, subtitle: str, preview_image_id: str, highlights: list, tags: list) -> tuple[list, str, str]: + def _parse_and_render_content_html_post( + self, + content: dict, + title: str, + subtitle: str, + preview_image_id: str, + highlights: list, + tags: list, + ) -> tuple[list, str, str]: paragraphs = content["bodyModel"]["paragraphs"] tags_list = [tag["displayTitle"] for tag in tags] out_paragraphs: list[str] = [] current_pos = 0 - def parse_paragraph_text(text: str, markups: list, is_code: bool = False) -> str: + def parse_paragraph_text( + text: str, markups: list, is_code: bool = False + ) -> RLStringHelper: if is_code: quote_html_type = ["minimal"] else: @@ -170,7 +229,9 @@ class MediumParser: fixed_markups = split_overlapping_ranges(parsed_markups) for markup in fixed_markups: - text_formater.set_template(markup["start"], markup["end"], markup["template"]) + text_formater.set_template( + markup["start"], markup["end"], markup["template"] + ) return text_formater @@ -199,33 +260,48 @@ class MediumParser: current_pos += 1 continue if paragraph["type"] in ["H4", "P"]: - is_paragraph_subtitle = getting_percontage_of_match(paragraph["text"], subtitle) > 80 + is_paragraph_subtitle = ( + getting_percontage_of_match(paragraph["text"], subtitle) > 80 + ) if is_paragraph_subtitle and not subtitle.endswith("…"): logger.trace("Subtitle was detected, ignore...") subtitle = paragraph["text"] current_pos += 1 continue - elif subtitle and subtitle.endswith("…") and len(paragraph["text"]) > 100: + elif ( + subtitle + and subtitle.endswith("…") + and len(paragraph["text"]) > 100 + ): subtitle = "" elif paragraph["type"] == "IMG": - if paragraph["metadata"] and paragraph["metadata"]["id"] == preview_image_id: + if ( + paragraph["metadata"] + and paragraph["metadata"]["id"] == preview_image_id + ): logger.trace("Preview image was detected, ignore...") current_pos += 1 continue if paragraph["text"] is not None: - text_formater = parse_paragraph_text(paragraph["text"], paragraph["markups"]) + text_formater = parse_paragraph_text( + paragraph["text"], paragraph["markups"] + ) else: - text_formater = None + text_formater = parse_paragraph_text("", []) for highlight in highlights: for highlight_paragraph in highlight["paragraphs"]: if highlight_paragraph["name"] == paragraph["name"]: logger.trace("Apply highlight to this paragraph") if highlight_paragraph["text"] != text_formater.get_text(): - logger.warning("Highlighted text and paragraph text are not the same! Skip...") + logger.warning( + "Highlighted text and paragraph text are not the same! Skip..." + ) break - quote_markup_template = '{{ text }}' + quote_markup_template = ( + '{{ text }}' + ) text_formater.set_template( highlight["startOffset"], highlight["endOffset"], @@ -237,45 +313,65 @@ class MediumParser: css_class = [] if out_paragraphs: css_class.append("pt-12") - header_template = jinja_env.from_string('
{{ text }}
') + paragraph_template = jinja_env.from_string( + '{{ text }}
' + ) if paragraphs[current_pos - 1]["type"] in ["H4", "H3"]: css_class.append("mt-3") else: css_class.append("mt-7") - paragraph_template_rendered = paragraph_template.render(text=text_formater.get_text(), css_class=" ".join(css_class)) + paragraph_template_rendered = paragraph_template.render( + text=text_formater.get_text(), css_class=" ".join(css_class) + ) out_paragraphs.append(paragraph_template_rendered) elif paragraph["type"] == "ULI": - uli_template = jinja_env.from_string('{{code_block}}')
- code_block_template = jinja_env.from_string('{{ text }}')
+ pre_template = jinja_env.from_string(
+ '{{code_block}}'
+ )
+ code_block_template = jinja_env.from_string(
+ '{{ text }}'
+ )
code_css_class = []
- if paragraph["codeBlockMetadata"] and paragraph["codeBlockMetadata"]["lang"] is not None:
- code_css_class.append(f'language-{paragraph["codeBlockMetadata"]["lang"]}')
+ if (
+ paragraph["codeBlockMetadata"]
+ and paragraph["codeBlockMetadata"]["lang"] is not None
+ ):
+ code_css_class.append(
+ f'language-{paragraph["codeBlockMetadata"]["lang"]}'
+ )
else:
code_css_class.append("nohighlight")
# code_css_class.append("auto")
@@ -355,15 +478,21 @@ class MediumParser:
while len(paragraphs) > _tmp_current_pos:
_paragraph = paragraphs[_tmp_current_pos]
if _paragraph["type"] == "PRE":
- text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"], is_code=True)
+ text_formater = parse_paragraph_text(
+ _paragraph["text"], _paragraph["markups"], is_code=True
+ )
code_list.append(text_formater.get_text())
else:
break
_tmp_current_pos += 1
- code_block_template_rendered = code_block_template.render(text="\n".join(code_list), code_css_class=" ".join(code_css_class))
- pre_template_rendered = pre_template.render(code_block=code_block_template_rendered)
+ code_block_template_rendered = code_block_template.render(
+ text="\n".join(code_list), code_css_class=" ".join(code_css_class)
+ )
+ pre_template_rendered = pre_template.render(
+ code_block=code_block_template_rendered
+ )
out_paragraphs.append(pre_template_rendered)
current_pos = _tmp_current_pos - 1
@@ -375,7 +504,9 @@ class MediumParser:
logger.trace(bq_template_rendered)
out_paragraphs.append(bq_template_rendered)
elif paragraph["type"] == "PQ":
- pq_template = jinja_env.from_string('') + pq_template = jinja_env.from_string( + '{{ text }}
' + ) pq_template_rendered = pq_template.render(text=text_formater.get_text()) logger.trace(pq_template_rendered) out_paragraphs.append(pq_template_rendered) @@ -387,14 +518,18 @@ class MediumParser: if paragraph.get("mixtapeMetadata") is not None: url = paragraph["mixtapeMetadata"]["href"] else: - logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't get url") + logger.warning( + "Ignore MIXTAPE_EMBED paragraph type, since we can't get url" + ) current_pos += 1 continue text_raw = paragraph["text"] if len(paragraph["markups"]) != 3: - logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't split text") + logger.warning( + "Ignore MIXTAPE_EMBED paragraph type, since we can't split text" + ) current_pos += 1 continue @@ -405,7 +540,9 @@ class MediumParser: logger.trace(f"{description_range=}") embed_title = text_raw[title_range["start"] : title_range["end"]] - embed_description = text_raw[description_range["start"] : description_range["end"]] + embed_description = text_raw[ + description_range["start"] : description_range["end"] + ] logger.trace(f"{embed_title=}") logger.trace(f"{embed_description=}") @@ -413,19 +550,30 @@ class MediumParser: try: embed_site = tld.get_fld(url) except Exception as ex: - logger.warning(f"Can't get embed site fld: {ex}. Using custom logic...") + logger.warning( + f"Can't get embed site fld: {ex}. Using custom logic..." + ) parsed_url = urllib.parse.urlparse(url) embed_site = parsed_url.hostname logger.trace(f"{embed_site=}") - embed_template_rendered = embed_template.render(paragraph=paragraph, url=url, embed_title=embed_title, embed_description=embed_description, embed_site=embed_site) + embed_template_rendered = embed_template.render( + paragraph=paragraph, + url=url, + embed_title=embed_title, + embed_description=embed_description, + embed_site=embed_site, + ) out_paragraphs.append(embed_template_rendered) elif paragraph["type"] == "IFRAME": iframe_template = jinja_env.from_string( '' ) - iframe_template_rendered = iframe_template.render(host_address=self.host_address, iframe_id=paragraph["iframe"]["mediaResource"]["id"]) + iframe_template_rendered = iframe_template.render( + host_address=self.host_address, + iframe_id=paragraph["iframe"]["mediaResource"]["id"], + ) out_paragraphs.append(iframe_template_rendered) else: @@ -444,10 +592,18 @@ class MediumParser: else: return result - async def generate_metadata(self, post_data: dict, post_id: str, as_dict: bool = False) -> tuple: - title = RLStringHelper(post_data["data"]["post"]["title"], ["minimal"]).get_text() - subtitle = RLStringHelper(post_data["data"]["post"]["previewContent"]["subtitle"]).get_text() - description = RLStringHelper(textwrap.shorten(subtitle, width=100, placeholder="...")).get_text() + async def generate_metadata( + self, post_data: dict, post_id: str, as_dict: bool = False + ) -> tuple | dict[str, str]: + title = RLStringHelper( + post_data["data"]["post"]["title"], ["minimal"] + ).get_text() + subtitle = RLStringHelper( + post_data["data"]["post"]["previewContent"]["subtitle"] + ).get_text() + description = RLStringHelper( + textwrap.shorten(subtitle, width=100, placeholder="...") + ).get_text() preview_image_id = post_data["data"]["post"]["previewImage"]["id"] creator = post_data["data"]["post"]["creator"] collection = post_data["data"]["post"]["collection"] @@ -455,8 +611,12 @@ class MediumParser: reading_time = math.ceil(post_data["data"]["post"]["readingTime"]) free_access = "No" if post_data["data"]["post"]["isLocked"] else "Yes" - updated_at = convert_datetime_to_human_readable(post_data["data"]["post"]["updatedAt"]) - first_published_at = convert_datetime_to_human_readable(post_data["data"]["post"]["firstPublishedAt"]) + updated_at = convert_datetime_to_human_readable( + post_data["data"]["post"]["updatedAt"] + ) + first_published_at = convert_datetime_to_human_readable( + post_data["data"]["post"]["firstPublishedAt"] + ) tags = post_data["data"]["post"]["tags"] if as_dict: @@ -476,14 +636,29 @@ class MediumParser: "tags": tags, } - return title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags + return ( + title, + subtitle, + description, + url, + creator, + collection, + reading_time, + free_access, + updated_at, + first_published_at, + preview_image_id, + tags, + ) async def _render_as_html(self, post_data: dict, post_id: str) -> "HtmlResult": # Generate metadata in parallel metadata_task = asyncio.create_task(self.generate_metadata(post_data, post_id)) # Parse and render content in parallel - content, title, subtitle = await asyncify(self._parse_and_render_content_html_post)( + content, title, subtitle = await asyncify( + self._parse_and_render_content_html_post + )( post_data["data"]["post"]["content"], post_data["data"]["post"]["title"], post_data["data"]["post"]["previewContent"]["subtitle"], @@ -493,13 +668,28 @@ class MediumParser: ) # Await metadata - title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags = await metadata_task + ( + title, + subtitle, + description, + url, + creator, + collection, + reading_time, + free_access, + updated_at, + first_published_at, + preview_image_id, + tags, + ) = await metadata_task post_page_title_raw = "{{ title }} | by {{ creator.name }}" if collection: post_page_title_raw += " | in {{ collection.name }}" post_page_title = jinja_env.from_string(post_page_title_raw) - post_page_title_rendered = post_page_title.render(title=title, creator=creator, collection=collection) + post_page_title_rendered = post_page_title.render( + title=title, creator=creator, collection=collection + ) post_context = { "subtitle": subtitle, @@ -517,7 +707,11 @@ class MediumParser: } post_template_rendered = self.post_template.render(post_context) - return HtmlResult(post_page_title_rendered, description, url, post_template_rendered) + return HtmlResult( + post_page_title_rendered, description, url, post_template_rendered + ) async def render_as_markdown(self) -> str: - raise NotImplementedError("Markdown rendering is not implemented. Please use HTML rendering instead") + raise NotImplementedError( + "Markdown rendering is not implemented. Please use HTML rendering instead" + ) diff --git a/core/medium_parser/exceptions.py b/medium-parser/medium_parser/exceptions.py similarity index 100% rename from core/medium_parser/exceptions.py rename to medium-parser/medium_parser/exceptions.py diff --git a/medium-parser/medium_parser/markups.py b/medium-parser/medium_parser/markups.py new file mode 100644 index 0000000..7aac76d --- /dev/null +++ b/medium-parser/medium_parser/markups.py @@ -0,0 +1,50 @@ +from medium_parser import jinja_env + + +def raw_render(**kwargs): + for key, value in kwargs.items(): + if isinstance(value, str): + kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}" + + return kwargs + + +def parse_markups(markups: list[str]): + markups_out = [] + + for markup in markups: + if markup["type"] == "A": + if markup["anchorType"] == "LINK": + template = jinja_env.from_string( + '{{text}}' + ) + template = template.render( + raw_render( + rel=markup.get("rel", ""), + title=markup.get("title", ""), + href=markup["href"], + ) + ) + elif markup["anchorType"] == "USER": + template = jinja_env.from_string( + '{{text}}' + ) + template = template.render(userId=markup["userId"]) + else: + continue + elif markup["type"] == "STRONG": + template = "{{text}}" + elif markup["type"] == "EM": + template = "{{text}}" + elif markup["type"] == "CODE": + template = ( + "{{ text }}
{{text}}"
+ )
+ else:
+ continue
+
+ template = jinja_env.from_string(template)
+ markup["template"] = template
+ markups_out.append(markup)
+
+ return markups_out
diff --git a/core/medium_parser/models/__init__.py b/medium-parser/medium_parser/models/__init__.py
similarity index 100%
rename from core/medium_parser/models/__init__.py
rename to medium-parser/medium_parser/models/__init__.py
diff --git a/core/medium_parser/models/html_result.py b/medium-parser/medium_parser/models/html_result.py
similarity index 100%
rename from core/medium_parser/models/html_result.py
rename to medium-parser/medium_parser/models/html_result.py
diff --git a/core/medium_parser/time.py b/medium-parser/medium_parser/time.py
similarity index 100%
rename from core/medium_parser/time.py
rename to medium-parser/medium_parser/time.py
diff --git a/core/medium_parser/utils.py b/medium-parser/medium_parser/utils.py
similarity index 100%
rename from core/medium_parser/utils.py
rename to medium-parser/medium_parser/utils.py
diff --git a/core/requirements.txt b/medium-parser/requirements.txt
similarity index 100%
rename from core/requirements.txt
rename to medium-parser/requirements.txt
diff --git a/core/setup.py b/medium-parser/setup.py
similarity index 100%
rename from core/setup.py
rename to medium-parser/setup.py
diff --git a/core/tests/example_base_template.html b/medium-parser/tests/example_base_template.html
similarity index 100%
rename from core/tests/example_base_template.html
rename to medium-parser/tests/example_base_template.html
diff --git a/core/tests/example_test.py b/medium-parser/tests/example_test.py
similarity index 100%
rename from core/tests/example_test.py
rename to medium-parser/tests/example_test.py
diff --git a/core/tests/templates/post.html b/medium-parser/tests/templates/post.html
similarity index 100%
rename from core/tests/templates/post.html
rename to medium-parser/tests/templates/post.html
diff --git a/rl_string_helper/rl_string_helper/__init__.py b/rl_string_helper/rl_string_helper/__init__.py
index 910abf7..c22dee0 100644
--- a/rl_string_helper/rl_string_helper/__init__.py
+++ b/rl_string_helper/rl_string_helper/__init__.py
@@ -1,2 +1,2 @@
-from .string_helper import RLStringHelper, parse_markups, split_overlapping_ranges
+from .string_helper import RLStringHelper, split_overlapping_ranges
from .utils import quote_html, quote_symbol
diff --git a/rl_string_helper/rl_string_helper/string_helper.py b/rl_string_helper/rl_string_helper/string_helper.py
index 1953358..1a8960f 100644
--- a/rl_string_helper/rl_string_helper/string_helper.py
+++ b/rl_string_helper/rl_string_helper/string_helper.py
@@ -16,6 +16,7 @@ In UTF-16, each Unicode character may be encoded as one or two code units (byte)
the value returned by length might not match the actual number of Unicode characters in the string.
Python uses UTF-8 encoding, which each character is encoded as one byte. So here is a workaround to get the actual number of characters and manipulate them in string as in UTF-16 encoding. See pre_utf_16_bang and post_utf_16_bang function.
+More info to read: https://habr.com/ru/articles/769256/
"""
@@ -38,14 +39,16 @@ class RLStringHelper:
_default_bang_char: str = "R",
):
self.string: str = quote_symbol(string)
- self.templates = []
- self.quote_replaces = []
- self.replaces = []
+ self.templates: list[tuple[tuple[int, int], Template]] = []
+ self.quote_replaces: list[tuple[tuple[int, int], str]] = []
+ self.replaces: list[tuple[tuple[int, int], str]] = []
self.quote_html_type = quote_html_type
self._default_bang_char = _default_bang_char
- def pre_utf_16_bang(self, string: str, string_pos_matrix: list):
- utf_16_bang_list = []
+ def pre_utf_16_bang(
+ self, string: str, string_pos_matrix: list
+ ) -> tuple[str, list, list[tuple[int, int, int]]]:
+ utf_16_bang_list: list[tuple[int, int, int]] = []
string_len_utf_16 = len(string.encode("utf-16-le")) // 2
if string_len_utf_16 == len(string):
logger.trace("String doesn't contain multibyte characters")
@@ -68,7 +71,13 @@ class RLStringHelper:
return string, string_pos_matrix, utf_16_bang_list
- def _paste_char(self, string: str, string_pos_matrix: list, pos: int, char: str):
+ def _paste_char(
+ self,
+ string: StringAssignmentMixin,
+ string_pos_matrix: list,
+ pos: int,
+ char: str,
+ ) -> tuple[StringAssignmentMixin, list]:
char_len = len(char)
string_pos_matrix.insert(pos, string_pos_matrix[pos])
for matrix_i in range(pos + 1, len(string_pos_matrix)):
@@ -78,7 +87,7 @@ class RLStringHelper:
def _delete_char(
self,
- string: str,
+ string: StringAssignmentMixin,
string_pos_matrix: list,
pos: int,
char_len: int,
@@ -115,7 +124,7 @@ class RLStringHelper:
post_transbang += char_len
return string, string_pos_matrix
- def set_template(self, start: int, end: int, template: str):
+ def set_template(self, start: int, end: int, template: str | Template):
if not isinstance(template, Template):
template = jinja_env.from_string(template)
self.templates.append(((start, end), template))
@@ -334,51 +343,3 @@ def split_overlapping_range_position(positions):
result[-1]["end"] = max(last["end"], pos["end"])
return result
-
-
-def raw_render(**kwargs):
- for key, value in kwargs.items():
- if isinstance(value, str):
- kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}"
- return kwargs
-
-
-def parse_markups(markups: list[str]):
- markups_out = []
-
- for markup in markups:
- if markup["type"] == "A":
- if markup["anchorType"] == "LINK":
- template = jinja_env.from_string(
- '{{text}}'
- )
- template = template.render(
- raw_render(
- rel=markup.get("rel", ""),
- title=markup.get("title", ""),
- href=markup["href"],
- )
- )
- elif markup["anchorType"] == "USER":
- template = jinja_env.from_string(
- '{{text}}'
- )
- template = template.render(userId=markup["userId"])
- else:
- continue
- elif markup["type"] == "STRONG":
- template = "{{text}}"
- elif markup["type"] == "EM":
- template = "{{text}}"
- elif markup["type"] == "CODE":
- template = (
- "{{text}}"
- )
- else:
- continue
-
- template = jinja_env.from_string(template)
- markup["template"] = template
- markups_out.append(markup)
-
- return markups_out
diff --git a/rl_string_helper/tests/test_rl_string_helper.py b/rl_string_helper/tests/test_rl_string_helper.py
index d4c0cba..5685f54 100644
--- a/rl_string_helper/tests/test_rl_string_helper.py
+++ b/rl_string_helper/tests/test_rl_string_helper.py
@@ -1,7 +1,11 @@
import sys
import re
from loguru import logger
-from rl_string_helper import RLStringHelper, quote_html, parse_markups, split_overlapping_ranges
+from rl_string_helper import (
+ RLStringHelper,
+ quote_html,
+ split_overlapping_ranges,
+)
class TestRLStringHelper:
@@ -16,19 +20,46 @@ class TestRLStringHelper:
# Test with standard HTML characters
html = ']*>Note: The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers\."
- assert re.match(expected_pattern, str(helper))
+ # expected_pattern = r"]*>Note: The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers\."
+ # assert re.match(expected_pattern, str(helper))
def test_basic_replace(self):
# Replace A to B - ONE to ONE char
@@ -90,10 +157,15 @@ class TestRLStringHelper:
helper.set_replace(0, 6, "B")
assert helper.get_text() == "B - 📊 - ABC"
- helper = RLStringHelper("Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.")
+ helper = RLStringHelper(
+ "Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏."
+ )
helper.set_template(0, 200, "