mirror of
https://codeberg.org/Freedium-cfd/web.git
synced 2026-03-11 09:04:37 +00:00
chore & refactor: structure changes, moving markup parser from rl_string parser to medium parser logic, type hintings
This commit is contained in:
parent
9adb380a83
commit
8b109ff4ec
23 changed files with 450 additions and 174 deletions
|
|
@ -15,8 +15,8 @@ RUN pip3 install --no-cache-dir ./rl_string_helper
|
|||
COPY ./database-lib ./database-lib
|
||||
RUN pip3 install --no-cache-dir ./database-lib
|
||||
|
||||
COPY ./core ./core
|
||||
RUN pip3 install --no-cache-dir ./core
|
||||
COPY ./medium-parser ./medium-parser
|
||||
RUN pip3 install --no-cache-dir ./medium-parser
|
||||
|
||||
COPY ./web ./web
|
||||
|
||||
|
|
|
|||
3
core/.gitmodules
vendored
3
core/.gitmodules
vendored
|
|
@ -1,3 +0,0 @@
|
|||
[submodule "medium_parser/toolkits/rl_string_helper"]
|
||||
path = medium_parser/toolkits/rl_string_helper
|
||||
url = https://github.com/Freedium-cfd/rl-string-helper
|
||||
|
|
@ -32,6 +32,8 @@ class CacheData:
|
|||
def __str__(self):
|
||||
return self.data
|
||||
|
||||
def has_data(self):
|
||||
return self.data is not None and self.data != ""
|
||||
|
||||
class CacheResponse:
|
||||
__slots__ = ("key", "data")
|
||||
|
|
|
|||
0
core/.gitignore → medium-parser/.gitignore
vendored
0
core/.gitignore → medium-parser/.gitignore
vendored
|
|
@ -1,3 +1,5 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import textwrap
|
||||
|
|
@ -10,28 +12,61 @@ import tld
|
|||
from asyncer import asyncify
|
||||
from loguru import logger
|
||||
|
||||
from rl_string_helper import RLStringHelper, parse_markups, split_overlapping_ranges
|
||||
from rl_string_helper import RLStringHelper, split_overlapping_ranges
|
||||
|
||||
from . import jinja_env
|
||||
from .exceptions import InvalidMediumPostID, InvalidMediumPostURL, InvalidURL, MediumParserException, MediumPostQueryError
|
||||
from .exceptions import (
|
||||
InvalidMediumPostID,
|
||||
InvalidMediumPostURL,
|
||||
InvalidURL,
|
||||
MediumParserException,
|
||||
MediumPostQueryError,
|
||||
)
|
||||
from .api import MediumApi
|
||||
from .models.html_result import HtmlResult
|
||||
from .time import convert_datetime_to_human_readable
|
||||
from .utils import correct_url, extract_hex_string, getting_percontage_of_match, is_has_valid_medium_post_id, is_valid_medium_url, is_valid_url, resolve_medium_url
|
||||
from .utils import (
|
||||
correct_url,
|
||||
extract_hex_string,
|
||||
getting_percontage_of_match,
|
||||
is_has_valid_medium_post_id,
|
||||
is_valid_medium_url,
|
||||
is_valid_url,
|
||||
resolve_medium_url,
|
||||
)
|
||||
from .markups import parse_markups
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from database_lib import AbstractCacheBackend
|
||||
|
||||
|
||||
class MediumParser:
|
||||
__slots__ = ("cache", "host_address", "jinja_template", "post_template", "timeout", "medium_api")
|
||||
__slots__ = (
|
||||
"cache",
|
||||
"host_address",
|
||||
"jinja_template",
|
||||
"post_template",
|
||||
"timeout",
|
||||
"medium_api",
|
||||
)
|
||||
|
||||
def __init__(self, cache: "AbstractCacheBackend", medium_api: MediumApi, timeout: int, host_address: str, template_folder: str = "./templates"):
|
||||
def __init__(
|
||||
self,
|
||||
cache: AbstractCacheBackend,
|
||||
medium_api: MediumApi,
|
||||
timeout: int,
|
||||
host_address: str,
|
||||
template_folder: str = "./templates",
|
||||
):
|
||||
self.timeout: int = timeout
|
||||
self.cache: AbstractCacheBackend = cache
|
||||
self.host_address: str = host_address
|
||||
self.jinja_template: jinja2.Environment = jinja2.Environment(loader=jinja2.FileSystemLoader(template_folder))
|
||||
self.post_template: jinja2.Template = self.jinja_template.get_template("post.html")
|
||||
self.jinja_template: jinja2.Environment = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader(template_folder)
|
||||
)
|
||||
self.post_template: jinja2.Template = self.jinja_template.get_template(
|
||||
"post.html"
|
||||
)
|
||||
self.medium_api: MediumApi = medium_api
|
||||
|
||||
async def resolve(self, unknown: str) -> str:
|
||||
|
|
@ -52,7 +87,9 @@ class MediumParser:
|
|||
|
||||
post_id = await resolve_medium_url(sanitized_url, self.timeout)
|
||||
if not post_id:
|
||||
raise InvalidMediumPostURL(f"Could not find Medium post ID for URL: {sanitized_url}")
|
||||
raise InvalidMediumPostURL(
|
||||
f"Could not find Medium post ID for URL: {sanitized_url}"
|
||||
)
|
||||
|
||||
return post_id
|
||||
|
||||
|
|
@ -64,7 +101,8 @@ class MediumParser:
|
|||
async def _get_from_cache():
|
||||
logger.debug("Using cache backend")
|
||||
post_data = self.cache.pull(post_id)
|
||||
if post_data:
|
||||
logger.info(f"Found data in cache: {post_data[:10]}")
|
||||
if post_data and post_data.data.has_data():
|
||||
logger.debug("post query was found on cache")
|
||||
return post_data.json()
|
||||
logger.debug(f"No data found in cache by {post_id}")
|
||||
|
|
@ -76,6 +114,7 @@ class MediumParser:
|
|||
logger.debug("Timeout while waiting for cache")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
logger.error(f"Error while waiting for cache: {e}")
|
||||
return None
|
||||
|
||||
|
|
@ -109,7 +148,13 @@ class MediumParser:
|
|||
|
||||
return post_data, cache_used
|
||||
|
||||
async def query(self, post_id: str, use_cache: bool = True, retry: int = 2, force_cache: bool = False):
|
||||
async def query(
|
||||
self,
|
||||
post_id: str,
|
||||
use_cache: bool = True,
|
||||
retry: int = 2,
|
||||
force_cache: bool = False,
|
||||
):
|
||||
logger.debug(f"Medium QUERY: {use_cache=}, {retry=}, {force_cache=}")
|
||||
|
||||
post_data, is_cache_used = None, False
|
||||
|
|
@ -118,7 +163,9 @@ class MediumParser:
|
|||
reason = None
|
||||
while not post_data and attempt < retry:
|
||||
try:
|
||||
post_data, is_cache_used = await self.query_get(post_id, use_cache, force_cache)
|
||||
post_data, is_cache_used = await self.query_get(
|
||||
post_id, use_cache, force_cache
|
||||
)
|
||||
|
||||
if not post_data:
|
||||
reason = "No post data returned"
|
||||
|
|
@ -144,7 +191,9 @@ class MediumParser:
|
|||
if not reason:
|
||||
reason = "Unknown"
|
||||
|
||||
raise MediumPostQueryError(f"Could not query post by ID from API: {post_id}. Reason: {reason}")
|
||||
raise MediumPostQueryError(
|
||||
f"Could not query post by ID from API: {post_id}. Reason: {reason}"
|
||||
)
|
||||
|
||||
if not is_cache_used:
|
||||
logger.debug("Pushing post data to cache")
|
||||
|
|
@ -153,13 +202,23 @@ class MediumParser:
|
|||
logger.trace(f"Query: done")
|
||||
return post_data
|
||||
|
||||
def _parse_and_render_content_html_post(self, content: dict, title: str, subtitle: str, preview_image_id: str, highlights: list, tags: list) -> tuple[list, str, str]:
|
||||
def _parse_and_render_content_html_post(
|
||||
self,
|
||||
content: dict,
|
||||
title: str,
|
||||
subtitle: str,
|
||||
preview_image_id: str,
|
||||
highlights: list,
|
||||
tags: list,
|
||||
) -> tuple[list, str, str]:
|
||||
paragraphs = content["bodyModel"]["paragraphs"]
|
||||
tags_list = [tag["displayTitle"] for tag in tags]
|
||||
out_paragraphs: list[str] = []
|
||||
current_pos = 0
|
||||
|
||||
def parse_paragraph_text(text: str, markups: list, is_code: bool = False) -> str:
|
||||
def parse_paragraph_text(
|
||||
text: str, markups: list, is_code: bool = False
|
||||
) -> RLStringHelper:
|
||||
if is_code:
|
||||
quote_html_type = ["minimal"]
|
||||
else:
|
||||
|
|
@ -170,7 +229,9 @@ class MediumParser:
|
|||
fixed_markups = split_overlapping_ranges(parsed_markups)
|
||||
|
||||
for markup in fixed_markups:
|
||||
text_formater.set_template(markup["start"], markup["end"], markup["template"])
|
||||
text_formater.set_template(
|
||||
markup["start"], markup["end"], markup["template"]
|
||||
)
|
||||
|
||||
return text_formater
|
||||
|
||||
|
|
@ -199,33 +260,48 @@ class MediumParser:
|
|||
current_pos += 1
|
||||
continue
|
||||
if paragraph["type"] in ["H4", "P"]:
|
||||
is_paragraph_subtitle = getting_percontage_of_match(paragraph["text"], subtitle) > 80
|
||||
is_paragraph_subtitle = (
|
||||
getting_percontage_of_match(paragraph["text"], subtitle) > 80
|
||||
)
|
||||
if is_paragraph_subtitle and not subtitle.endswith("…"):
|
||||
logger.trace("Subtitle was detected, ignore...")
|
||||
subtitle = paragraph["text"]
|
||||
current_pos += 1
|
||||
continue
|
||||
elif subtitle and subtitle.endswith("…") and len(paragraph["text"]) > 100:
|
||||
elif (
|
||||
subtitle
|
||||
and subtitle.endswith("…")
|
||||
and len(paragraph["text"]) > 100
|
||||
):
|
||||
subtitle = ""
|
||||
elif paragraph["type"] == "IMG":
|
||||
if paragraph["metadata"] and paragraph["metadata"]["id"] == preview_image_id:
|
||||
if (
|
||||
paragraph["metadata"]
|
||||
and paragraph["metadata"]["id"] == preview_image_id
|
||||
):
|
||||
logger.trace("Preview image was detected, ignore...")
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
if paragraph["text"] is not None:
|
||||
text_formater = parse_paragraph_text(paragraph["text"], paragraph["markups"])
|
||||
text_formater = parse_paragraph_text(
|
||||
paragraph["text"], paragraph["markups"]
|
||||
)
|
||||
else:
|
||||
text_formater = None
|
||||
text_formater = parse_paragraph_text("", [])
|
||||
|
||||
for highlight in highlights:
|
||||
for highlight_paragraph in highlight["paragraphs"]:
|
||||
if highlight_paragraph["name"] == paragraph["name"]:
|
||||
logger.trace("Apply highlight to this paragraph")
|
||||
if highlight_paragraph["text"] != text_formater.get_text():
|
||||
logger.warning("Highlighted text and paragraph text are not the same! Skip...")
|
||||
logger.warning(
|
||||
"Highlighted text and paragraph text are not the same! Skip..."
|
||||
)
|
||||
break
|
||||
quote_markup_template = '<mark class="bg-emerald-300">{{ text }}</mark>'
|
||||
quote_markup_template = (
|
||||
'<mark class="bg-emerald-300">{{ text }}</mark>'
|
||||
)
|
||||
text_formater.set_template(
|
||||
highlight["startOffset"],
|
||||
highlight["endOffset"],
|
||||
|
|
@ -237,45 +313,65 @@ class MediumParser:
|
|||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-12")
|
||||
header_template = jinja_env.from_string('<h2 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h2>')
|
||||
header_template_rendered = header_template.render(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
header_template = jinja_env.from_string(
|
||||
'<h2 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h2>'
|
||||
)
|
||||
header_template_rendered = header_template.render(
|
||||
text=text_formater.get_text(), css_class="".join(css_class)
|
||||
)
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "H3":
|
||||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-12")
|
||||
header_template = jinja_env.from_string('<h3 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h3>')
|
||||
header_template_rendered = header_template.render(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
header_template = jinja_env.from_string(
|
||||
'<h3 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-1xl md:text-2xl {{ css_class }}">{{ text }}</h3>'
|
||||
)
|
||||
header_template_rendered = header_template.render(
|
||||
text=text_formater.get_text(), css_class="".join(css_class)
|
||||
)
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "H4":
|
||||
css_class = []
|
||||
if out_paragraphs:
|
||||
css_class.append("pt-8")
|
||||
header_template = jinja_env.from_string('<h4 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-l md:text-xl {{ css_class }}">{{ text }}</h4>')
|
||||
header_template_rendered = header_template.render(text=text_formater.get_text(), css_class="".join(css_class))
|
||||
header_template = jinja_env.from_string(
|
||||
'<h4 class="font-bold font-sans break-normal text-gray-900 dark:text-gray-100 text-l md:text-xl {{ css_class }}">{{ text }}</h4>'
|
||||
)
|
||||
header_template_rendered = header_template.render(
|
||||
text=text_formater.get_text(), css_class="".join(css_class)
|
||||
)
|
||||
out_paragraphs.append(header_template_rendered)
|
||||
elif paragraph["type"] == "IMG":
|
||||
image_template = jinja_env.from_string(
|
||||
'<div class="mt-7"><img alt="{{ paragraph.metadata.alt }}" class="pt-5 lazy m-auto" role="presentation" data-src="https://miro.medium.com/v2/resize:fit:700/{{ paragraph.metadata.id }}"></div>'
|
||||
)
|
||||
image_caption_template = jinja_env.from_string("<figcaption class='mt-3 text-sm text-center text-gray-500 dark:text-gray-200'>{{ text }}</figcaption>")
|
||||
image_caption_template = jinja_env.from_string(
|
||||
"<figcaption class='mt-3 text-sm text-center text-gray-500 dark:text-gray-200'>{{ text }}</figcaption>"
|
||||
)
|
||||
if paragraph["layout"] == "OUTSET_ROW":
|
||||
image_templates_row = []
|
||||
img_row_template = jinja_env.from_string('<div class="mx-5"><div class="flex flex-row justify-center">{{ images }}</div></div>')
|
||||
img_row_template = jinja_env.from_string(
|
||||
'<div class="mx-5"><div class="flex flex-row justify-center">{{ images }}</div></div>'
|
||||
)
|
||||
image_template_rendered = image_template.render(paragraph=paragraph)
|
||||
image_templates_row.append(image_template_rendered)
|
||||
_tmp_current_pos = current_pos + 1
|
||||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["layout"] == "OUTSET_ROW_CONTINUE":
|
||||
image_template_rendered = image_template.render(paragraph=_paragraph)
|
||||
image_template_rendered = image_template.render(
|
||||
paragraph=_paragraph
|
||||
)
|
||||
image_templates_row.append(image_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
img_row_template_rendered = img_row_template.render(images="".join(image_templates_row))
|
||||
img_row_template_rendered = img_row_template.render(
|
||||
images="".join(image_templates_row)
|
||||
)
|
||||
out_paragraphs.append(img_row_template_rendered)
|
||||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
|
|
@ -287,18 +383,26 @@ class MediumParser:
|
|||
image_template_rendered = image_template.render(paragraph=paragraph)
|
||||
out_paragraphs.append(image_template_rendered)
|
||||
if paragraph["text"]:
|
||||
out_paragraphs.append(image_caption_template.render(text=text_formater.get_text()))
|
||||
out_paragraphs.append(
|
||||
image_caption_template.render(text=text_formater.get_text())
|
||||
)
|
||||
elif paragraph["type"] == "P":
|
||||
css_class = ["leading-8"]
|
||||
paragraph_template = jinja_env.from_string('<p class="{{ css_class }}">{{ text }}</p>')
|
||||
paragraph_template = jinja_env.from_string(
|
||||
'<p class="{{ css_class }}">{{ text }}</p>'
|
||||
)
|
||||
if paragraphs[current_pos - 1]["type"] in ["H4", "H3"]:
|
||||
css_class.append("mt-3")
|
||||
else:
|
||||
css_class.append("mt-7")
|
||||
paragraph_template_rendered = paragraph_template.render(text=text_formater.get_text(), css_class=" ".join(css_class))
|
||||
paragraph_template_rendered = paragraph_template.render(
|
||||
text=text_formater.get_text(), css_class=" ".join(css_class)
|
||||
)
|
||||
out_paragraphs.append(paragraph_template_rendered)
|
||||
elif paragraph["type"] == "ULI":
|
||||
uli_template = jinja_env.from_string('<ul class="list-disc pl-8 mt-2">{{ li }}</ul>')
|
||||
uli_template = jinja_env.from_string(
|
||||
'<ul class="list-disc pl-8 mt-2">{{ li }}</ul>'
|
||||
)
|
||||
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
|
||||
li_templates = []
|
||||
|
||||
|
|
@ -306,8 +410,12 @@ class MediumParser:
|
|||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "ULI":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
|
||||
li_template_rendered = li_template.render(text=text_formater.get_text())
|
||||
text_formater = parse_paragraph_text(
|
||||
_paragraph["text"], _paragraph["markups"]
|
||||
)
|
||||
li_template_rendered = li_template.render(
|
||||
text=text_formater.get_text()
|
||||
)
|
||||
li_templates.append(li_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
|
@ -319,7 +427,9 @@ class MediumParser:
|
|||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
elif paragraph["type"] == "OLI":
|
||||
ol_template = jinja_env.from_string('<ol class="list-decimal pl-8 mt-2">{{ li }}</ol>')
|
||||
ol_template = jinja_env.from_string(
|
||||
'<ol class="list-decimal pl-8 mt-2">{{ li }}</ol>'
|
||||
)
|
||||
li_template = jinja_env.from_string("<li class='mt-3'>{{ text }}</li>")
|
||||
li_templates = []
|
||||
|
||||
|
|
@ -327,8 +437,12 @@ class MediumParser:
|
|||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "OLI":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"])
|
||||
li_template_rendered = li_template.render(text=text_formater.get_text())
|
||||
text_formater = parse_paragraph_text(
|
||||
_paragraph["text"], _paragraph["markups"]
|
||||
)
|
||||
li_template_rendered = li_template.render(
|
||||
text=text_formater.get_text()
|
||||
)
|
||||
li_templates.append(li_template_rendered)
|
||||
else:
|
||||
break
|
||||
|
|
@ -340,12 +454,21 @@ class MediumParser:
|
|||
|
||||
current_pos = _tmp_current_pos - 1
|
||||
elif paragraph["type"] == "PRE":
|
||||
pre_template = jinja_env.from_string('<pre class="mt-7 flex flex-col justify-center border dark:border-gray-700">{{code_block}}</pre>')
|
||||
code_block_template = jinja_env.from_string('<code class="p-2 bg-gray-100 dark:bg-gray-900 overflow-x-auto {{ code_css_class }}">{{ text }}</code>')
|
||||
pre_template = jinja_env.from_string(
|
||||
'<pre class="mt-7 flex flex-col justify-center border dark:border-gray-700">{{code_block}}</pre>'
|
||||
)
|
||||
code_block_template = jinja_env.from_string(
|
||||
'<code class="p-2 bg-gray-100 dark:bg-gray-900 overflow-x-auto {{ code_css_class }}">{{ text }}</code>'
|
||||
)
|
||||
|
||||
code_css_class = []
|
||||
if paragraph["codeBlockMetadata"] and paragraph["codeBlockMetadata"]["lang"] is not None:
|
||||
code_css_class.append(f'language-{paragraph["codeBlockMetadata"]["lang"]}')
|
||||
if (
|
||||
paragraph["codeBlockMetadata"]
|
||||
and paragraph["codeBlockMetadata"]["lang"] is not None
|
||||
):
|
||||
code_css_class.append(
|
||||
f'language-{paragraph["codeBlockMetadata"]["lang"]}'
|
||||
)
|
||||
else:
|
||||
code_css_class.append("nohighlight")
|
||||
# code_css_class.append("auto")
|
||||
|
|
@ -355,15 +478,21 @@ class MediumParser:
|
|||
while len(paragraphs) > _tmp_current_pos:
|
||||
_paragraph = paragraphs[_tmp_current_pos]
|
||||
if _paragraph["type"] == "PRE":
|
||||
text_formater = parse_paragraph_text(_paragraph["text"], _paragraph["markups"], is_code=True)
|
||||
text_formater = parse_paragraph_text(
|
||||
_paragraph["text"], _paragraph["markups"], is_code=True
|
||||
)
|
||||
code_list.append(text_formater.get_text())
|
||||
else:
|
||||
break
|
||||
|
||||
_tmp_current_pos += 1
|
||||
|
||||
code_block_template_rendered = code_block_template.render(text="\n".join(code_list), code_css_class=" ".join(code_css_class))
|
||||
pre_template_rendered = pre_template.render(code_block=code_block_template_rendered)
|
||||
code_block_template_rendered = code_block_template.render(
|
||||
text="\n".join(code_list), code_css_class=" ".join(code_css_class)
|
||||
)
|
||||
pre_template_rendered = pre_template.render(
|
||||
code_block=code_block_template_rendered
|
||||
)
|
||||
|
||||
out_paragraphs.append(pre_template_rendered)
|
||||
current_pos = _tmp_current_pos - 1
|
||||
|
|
@ -375,7 +504,9 @@ class MediumParser:
|
|||
logger.trace(bq_template_rendered)
|
||||
out_paragraphs.append(bq_template_rendered)
|
||||
elif paragraph["type"] == "PQ":
|
||||
pq_template = jinja_env.from_string('<blockquote class="mt-7 text-2xl ml-5 text-gray-600 dark:text-gray-300"><p>{{ text }}</p></blockquote>')
|
||||
pq_template = jinja_env.from_string(
|
||||
'<blockquote class="mt-7 text-2xl ml-5 text-gray-600 dark:text-gray-300"><p>{{ text }}</p></blockquote>'
|
||||
)
|
||||
pq_template_rendered = pq_template.render(text=text_formater.get_text())
|
||||
logger.trace(pq_template_rendered)
|
||||
out_paragraphs.append(pq_template_rendered)
|
||||
|
|
@ -387,14 +518,18 @@ class MediumParser:
|
|||
if paragraph.get("mixtapeMetadata") is not None:
|
||||
url = paragraph["mixtapeMetadata"]["href"]
|
||||
else:
|
||||
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't get url")
|
||||
logger.warning(
|
||||
"Ignore MIXTAPE_EMBED paragraph type, since we can't get url"
|
||||
)
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
text_raw = paragraph["text"]
|
||||
|
||||
if len(paragraph["markups"]) != 3:
|
||||
logger.warning("Ignore MIXTAPE_EMBED paragraph type, since we can't split text")
|
||||
logger.warning(
|
||||
"Ignore MIXTAPE_EMBED paragraph type, since we can't split text"
|
||||
)
|
||||
current_pos += 1
|
||||
continue
|
||||
|
||||
|
|
@ -405,7 +540,9 @@ class MediumParser:
|
|||
logger.trace(f"{description_range=}")
|
||||
|
||||
embed_title = text_raw[title_range["start"] : title_range["end"]]
|
||||
embed_description = text_raw[description_range["start"] : description_range["end"]]
|
||||
embed_description = text_raw[
|
||||
description_range["start"] : description_range["end"]
|
||||
]
|
||||
|
||||
logger.trace(f"{embed_title=}")
|
||||
logger.trace(f"{embed_description=}")
|
||||
|
|
@ -413,19 +550,30 @@ class MediumParser:
|
|||
try:
|
||||
embed_site = tld.get_fld(url)
|
||||
except Exception as ex:
|
||||
logger.warning(f"Can't get embed site fld: {ex}. Using custom logic...")
|
||||
logger.warning(
|
||||
f"Can't get embed site fld: {ex}. Using custom logic..."
|
||||
)
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
embed_site = parsed_url.hostname
|
||||
|
||||
logger.trace(f"{embed_site=}")
|
||||
|
||||
embed_template_rendered = embed_template.render(paragraph=paragraph, url=url, embed_title=embed_title, embed_description=embed_description, embed_site=embed_site)
|
||||
embed_template_rendered = embed_template.render(
|
||||
paragraph=paragraph,
|
||||
url=url,
|
||||
embed_title=embed_title,
|
||||
embed_description=embed_description,
|
||||
embed_site=embed_site,
|
||||
)
|
||||
out_paragraphs.append(embed_template_rendered)
|
||||
elif paragraph["type"] == "IFRAME":
|
||||
iframe_template = jinja_env.from_string(
|
||||
'<div class="mt-7"><iframe class="lazy w-full" data-src="{{ host_address }}/render_iframe/{{ iframe_id }}" allowfullscreen="" frameborder="0" scrolling="no"></iframe></div>'
|
||||
)
|
||||
iframe_template_rendered = iframe_template.render(host_address=self.host_address, iframe_id=paragraph["iframe"]["mediaResource"]["id"])
|
||||
iframe_template_rendered = iframe_template.render(
|
||||
host_address=self.host_address,
|
||||
iframe_id=paragraph["iframe"]["mediaResource"]["id"],
|
||||
)
|
||||
out_paragraphs.append(iframe_template_rendered)
|
||||
|
||||
else:
|
||||
|
|
@ -444,10 +592,18 @@ class MediumParser:
|
|||
else:
|
||||
return result
|
||||
|
||||
async def generate_metadata(self, post_data: dict, post_id: str, as_dict: bool = False) -> tuple:
|
||||
title = RLStringHelper(post_data["data"]["post"]["title"], ["minimal"]).get_text()
|
||||
subtitle = RLStringHelper(post_data["data"]["post"]["previewContent"]["subtitle"]).get_text()
|
||||
description = RLStringHelper(textwrap.shorten(subtitle, width=100, placeholder="...")).get_text()
|
||||
async def generate_metadata(
|
||||
self, post_data: dict, post_id: str, as_dict: bool = False
|
||||
) -> tuple | dict[str, str]:
|
||||
title = RLStringHelper(
|
||||
post_data["data"]["post"]["title"], ["minimal"]
|
||||
).get_text()
|
||||
subtitle = RLStringHelper(
|
||||
post_data["data"]["post"]["previewContent"]["subtitle"]
|
||||
).get_text()
|
||||
description = RLStringHelper(
|
||||
textwrap.shorten(subtitle, width=100, placeholder="...")
|
||||
).get_text()
|
||||
preview_image_id = post_data["data"]["post"]["previewImage"]["id"]
|
||||
creator = post_data["data"]["post"]["creator"]
|
||||
collection = post_data["data"]["post"]["collection"]
|
||||
|
|
@ -455,8 +611,12 @@ class MediumParser:
|
|||
|
||||
reading_time = math.ceil(post_data["data"]["post"]["readingTime"])
|
||||
free_access = "No" if post_data["data"]["post"]["isLocked"] else "Yes"
|
||||
updated_at = convert_datetime_to_human_readable(post_data["data"]["post"]["updatedAt"])
|
||||
first_published_at = convert_datetime_to_human_readable(post_data["data"]["post"]["firstPublishedAt"])
|
||||
updated_at = convert_datetime_to_human_readable(
|
||||
post_data["data"]["post"]["updatedAt"]
|
||||
)
|
||||
first_published_at = convert_datetime_to_human_readable(
|
||||
post_data["data"]["post"]["firstPublishedAt"]
|
||||
)
|
||||
tags = post_data["data"]["post"]["tags"]
|
||||
|
||||
if as_dict:
|
||||
|
|
@ -476,14 +636,29 @@ class MediumParser:
|
|||
"tags": tags,
|
||||
}
|
||||
|
||||
return title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags
|
||||
return (
|
||||
title,
|
||||
subtitle,
|
||||
description,
|
||||
url,
|
||||
creator,
|
||||
collection,
|
||||
reading_time,
|
||||
free_access,
|
||||
updated_at,
|
||||
first_published_at,
|
||||
preview_image_id,
|
||||
tags,
|
||||
)
|
||||
|
||||
async def _render_as_html(self, post_data: dict, post_id: str) -> "HtmlResult":
|
||||
# Generate metadata in parallel
|
||||
metadata_task = asyncio.create_task(self.generate_metadata(post_data, post_id))
|
||||
|
||||
# Parse and render content in parallel
|
||||
content, title, subtitle = await asyncify(self._parse_and_render_content_html_post)(
|
||||
content, title, subtitle = await asyncify(
|
||||
self._parse_and_render_content_html_post
|
||||
)(
|
||||
post_data["data"]["post"]["content"],
|
||||
post_data["data"]["post"]["title"],
|
||||
post_data["data"]["post"]["previewContent"]["subtitle"],
|
||||
|
|
@ -493,13 +668,28 @@ class MediumParser:
|
|||
)
|
||||
|
||||
# Await metadata
|
||||
title, subtitle, description, url, creator, collection, reading_time, free_access, updated_at, first_published_at, preview_image_id, tags = await metadata_task
|
||||
(
|
||||
title,
|
||||
subtitle,
|
||||
description,
|
||||
url,
|
||||
creator,
|
||||
collection,
|
||||
reading_time,
|
||||
free_access,
|
||||
updated_at,
|
||||
first_published_at,
|
||||
preview_image_id,
|
||||
tags,
|
||||
) = await metadata_task
|
||||
|
||||
post_page_title_raw = "{{ title }} | by {{ creator.name }}"
|
||||
if collection:
|
||||
post_page_title_raw += " | in {{ collection.name }}"
|
||||
post_page_title = jinja_env.from_string(post_page_title_raw)
|
||||
post_page_title_rendered = post_page_title.render(title=title, creator=creator, collection=collection)
|
||||
post_page_title_rendered = post_page_title.render(
|
||||
title=title, creator=creator, collection=collection
|
||||
)
|
||||
|
||||
post_context = {
|
||||
"subtitle": subtitle,
|
||||
|
|
@ -517,7 +707,11 @@ class MediumParser:
|
|||
}
|
||||
post_template_rendered = self.post_template.render(post_context)
|
||||
|
||||
return HtmlResult(post_page_title_rendered, description, url, post_template_rendered)
|
||||
return HtmlResult(
|
||||
post_page_title_rendered, description, url, post_template_rendered
|
||||
)
|
||||
|
||||
async def render_as_markdown(self) -> str:
|
||||
raise NotImplementedError("Markdown rendering is not implemented. Please use HTML rendering instead")
|
||||
raise NotImplementedError(
|
||||
"Markdown rendering is not implemented. Please use HTML rendering instead"
|
||||
)
|
||||
50
medium-parser/medium_parser/markups.py
Normal file
50
medium-parser/medium_parser/markups.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
from medium_parser import jinja_env
|
||||
|
||||
|
||||
def raw_render(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if isinstance(value, str):
|
||||
kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}"
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
def parse_markups(markups: list[str]):
|
||||
markups_out = []
|
||||
|
||||
for markup in markups:
|
||||
if markup["type"] == "A":
|
||||
if markup["anchorType"] == "LINK":
|
||||
template = jinja_env.from_string(
|
||||
'<a style="text-decoration: underline;" rel="{{rel}}" title="{{title}}" href="{{href}}" target="_blank">{{text}}</a>'
|
||||
)
|
||||
template = template.render(
|
||||
raw_render(
|
||||
rel=markup.get("rel", ""),
|
||||
title=markup.get("title", ""),
|
||||
href=markup["href"],
|
||||
)
|
||||
)
|
||||
elif markup["anchorType"] == "USER":
|
||||
template = jinja_env.from_string(
|
||||
'<a style="text-decoration: underline;" href="https://medium.com/u/{{userId}}">{{text}}</a>'
|
||||
)
|
||||
template = template.render(userId=markup["userId"])
|
||||
else:
|
||||
continue
|
||||
elif markup["type"] == "STRONG":
|
||||
template = "<strong>{{text}}</strong>"
|
||||
elif markup["type"] == "EM":
|
||||
template = "<em>{{text}}</em>"
|
||||
elif markup["type"] == "CODE":
|
||||
template = (
|
||||
"<code class='p-1.5 bg-gray-300 dark:bg-gray-600'>{{text}}</code>"
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
template = jinja_env.from_string(template)
|
||||
markup["template"] = template
|
||||
markups_out.append(markup)
|
||||
|
||||
return markups_out
|
||||
|
|
@ -1,2 +1,2 @@
|
|||
from .string_helper import RLStringHelper, parse_markups, split_overlapping_ranges
|
||||
from .string_helper import RLStringHelper, split_overlapping_ranges
|
||||
from .utils import quote_html, quote_symbol
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ In UTF-16, each Unicode character may be encoded as one or two code units (byte)
|
|||
the value returned by length might not match the actual number of Unicode characters in the string.
|
||||
|
||||
Python uses UTF-8 encoding, which each character is encoded as one byte. So here is a workaround to get the actual number of characters and manipulate them in string as in UTF-16 encoding. See pre_utf_16_bang and post_utf_16_bang function.
|
||||
More info to read: https://habr.com/ru/articles/769256/
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -38,14 +39,16 @@ class RLStringHelper:
|
|||
_default_bang_char: str = "R",
|
||||
):
|
||||
self.string: str = quote_symbol(string)
|
||||
self.templates = []
|
||||
self.quote_replaces = []
|
||||
self.replaces = []
|
||||
self.templates: list[tuple[tuple[int, int], Template]] = []
|
||||
self.quote_replaces: list[tuple[tuple[int, int], str]] = []
|
||||
self.replaces: list[tuple[tuple[int, int], str]] = []
|
||||
self.quote_html_type = quote_html_type
|
||||
self._default_bang_char = _default_bang_char
|
||||
|
||||
def pre_utf_16_bang(self, string: str, string_pos_matrix: list):
|
||||
utf_16_bang_list = []
|
||||
def pre_utf_16_bang(
|
||||
self, string: str, string_pos_matrix: list
|
||||
) -> tuple[str, list, list[tuple[int, int, int]]]:
|
||||
utf_16_bang_list: list[tuple[int, int, int]] = []
|
||||
string_len_utf_16 = len(string.encode("utf-16-le")) // 2
|
||||
if string_len_utf_16 == len(string):
|
||||
logger.trace("String doesn't contain multibyte characters")
|
||||
|
|
@ -68,7 +71,13 @@ class RLStringHelper:
|
|||
|
||||
return string, string_pos_matrix, utf_16_bang_list
|
||||
|
||||
def _paste_char(self, string: str, string_pos_matrix: list, pos: int, char: str):
|
||||
def _paste_char(
|
||||
self,
|
||||
string: StringAssignmentMixin,
|
||||
string_pos_matrix: list,
|
||||
pos: int,
|
||||
char: str,
|
||||
) -> tuple[StringAssignmentMixin, list]:
|
||||
char_len = len(char)
|
||||
string_pos_matrix.insert(pos, string_pos_matrix[pos])
|
||||
for matrix_i in range(pos + 1, len(string_pos_matrix)):
|
||||
|
|
@ -78,7 +87,7 @@ class RLStringHelper:
|
|||
|
||||
def _delete_char(
|
||||
self,
|
||||
string: str,
|
||||
string: StringAssignmentMixin,
|
||||
string_pos_matrix: list,
|
||||
pos: int,
|
||||
char_len: int,
|
||||
|
|
@ -115,7 +124,7 @@ class RLStringHelper:
|
|||
post_transbang += char_len
|
||||
return string, string_pos_matrix
|
||||
|
||||
def set_template(self, start: int, end: int, template: str):
|
||||
def set_template(self, start: int, end: int, template: str | Template):
|
||||
if not isinstance(template, Template):
|
||||
template = jinja_env.from_string(template)
|
||||
self.templates.append(((start, end), template))
|
||||
|
|
@ -334,51 +343,3 @@ def split_overlapping_range_position(positions):
|
|||
result[-1]["end"] = max(last["end"], pos["end"])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def raw_render(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if isinstance(value, str):
|
||||
kwargs[key] = f"{{% raw %}}{value}{{% endraw %}}"
|
||||
return kwargs
|
||||
|
||||
|
||||
def parse_markups(markups: list[str]):
|
||||
markups_out = []
|
||||
|
||||
for markup in markups:
|
||||
if markup["type"] == "A":
|
||||
if markup["anchorType"] == "LINK":
|
||||
template = jinja_env.from_string(
|
||||
'<a style="text-decoration: underline;" rel="{{rel}}" title="{{title}}" href="{{href}}" target="_blank">{{text}}</a>'
|
||||
)
|
||||
template = template.render(
|
||||
raw_render(
|
||||
rel=markup.get("rel", ""),
|
||||
title=markup.get("title", ""),
|
||||
href=markup["href"],
|
||||
)
|
||||
)
|
||||
elif markup["anchorType"] == "USER":
|
||||
template = jinja_env.from_string(
|
||||
'<a style="text-decoration: underline;" href="https://medium.com/u/{{userId}}">{{text}}</a>'
|
||||
)
|
||||
template = template.render(userId=markup["userId"])
|
||||
else:
|
||||
continue
|
||||
elif markup["type"] == "STRONG":
|
||||
template = "<strong>{{text}}</strong>"
|
||||
elif markup["type"] == "EM":
|
||||
template = "<em>{{text}}</em>"
|
||||
elif markup["type"] == "CODE":
|
||||
template = (
|
||||
"<code class='p-1.5 bg-gray-300 dark:bg-gray-600'>{{text}}</code>"
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
template = jinja_env.from_string(template)
|
||||
markup["template"] = template
|
||||
markups_out.append(markup)
|
||||
|
||||
return markups_out
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import sys
|
||||
import re
|
||||
from loguru import logger
|
||||
from rl_string_helper import RLStringHelper, quote_html, parse_markups, split_overlapping_ranges
|
||||
from rl_string_helper import (
|
||||
RLStringHelper,
|
||||
quote_html,
|
||||
split_overlapping_ranges,
|
||||
)
|
||||
|
||||
|
||||
class TestRLStringHelper:
|
||||
|
|
@ -16,19 +20,46 @@ class TestRLStringHelper:
|
|||
# Test with standard HTML characters
|
||||
html = '<div class="test">Hello & World</div>'
|
||||
result = list(quote_html(html, "full"))
|
||||
expected = [((0, 1), "<"), ((11, 12), """), ((16, 17), """), ((17, 18), ">"), ((24, 25), "&"), ((31, 32), "<"), ((36, 37), ">")]
|
||||
expected = [
|
||||
((0, 1), "<"),
|
||||
((11, 12), """),
|
||||
((16, 17), """),
|
||||
((17, 18), ">"),
|
||||
((24, 25), "&"),
|
||||
((31, 32), "<"),
|
||||
((36, 37), ">"),
|
||||
]
|
||||
assert sorted(result) == sorted(expected)
|
||||
|
||||
# Test with extra characters
|
||||
html = '<div class="test">\nHello & World</div>'
|
||||
result = list(quote_html(html, "extra"))
|
||||
expected = [((0, 1), "<"), ((11, 12), """), ((16, 17), """), ((17, 18), ">"), ((25, 26), "&"), ((32, 33), "<"), ((37, 38), ">"), ((18, 19), "<br />")]
|
||||
expected = [
|
||||
((0, 1), "<"),
|
||||
((11, 12), """),
|
||||
((16, 17), """),
|
||||
((17, 18), ">"),
|
||||
((25, 26), "&"),
|
||||
((32, 33), "<"),
|
||||
((37, 38), ">"),
|
||||
((18, 19), "<br />"),
|
||||
]
|
||||
assert sorted(result) == sorted(expected)
|
||||
|
||||
# Test with quote characters
|
||||
html = '<div class="test">Hello & \'World\'</div>'
|
||||
html = "<div class=\"test\">Hello & 'World'</div>"
|
||||
result = list(quote_html(html, "full"))
|
||||
expected = [((0, 1), "<"), ((11, 12), """), ((16, 17), """), ((17, 18), ">"), ((24, 25), "&"), ((26, 27), "'"), ((32, 33), "'"), ((33, 34), "<"), ((38, 39), ">")]
|
||||
expected = [
|
||||
((0, 1), "<"),
|
||||
((11, 12), """),
|
||||
((16, 17), """),
|
||||
((17, 18), ">"),
|
||||
((24, 25), "&"),
|
||||
((26, 27), "'"),
|
||||
((32, 33), "'"),
|
||||
((33, 34), "<"),
|
||||
((38, 39), ">"),
|
||||
]
|
||||
assert sorted(result) == sorted(expected)
|
||||
|
||||
def test_basic_template(self):
|
||||
|
|
@ -42,26 +73,62 @@ class TestRLStringHelper:
|
|||
helper.set_template(0, 11, "<i>{{text}}</i>")
|
||||
assert str(helper) == "<i><a>Hello</a> <b>world</b></i>"
|
||||
|
||||
def test_super_duper_overlapsing(self):
|
||||
# https://medium.com/google-cloud/implementing-semantic-caching-a-step-by-step-guide-to-faster-cost-effective-genai-workflows-ef85d8e72883#bypass
|
||||
text = "Note: The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers."
|
||||
helper = RLStringHelper(text)
|
||||
markups = (
|
||||
[
|
||||
{"__typename": "Markup", "name": None, "type": "CODE", "start": 0, "end": 5, "href": None, "title": None, "rel": None, "anchorType": None, "userId": None, "creatorIds": None},
|
||||
{"__typename": "Markup", "name": None, "type": "STRONG", "start": 0, "end": 6, "href": None, "title": None, "rel": None, "anchorType": None, "userId": None, "creatorIds": None},
|
||||
{"__typename": "Markup", "name": None, "type": "EM", "start": 0, "end": 6, "href": None, "title": None, "rel": None, "anchorType": None, "userId": None, "creatorIds": None},
|
||||
],
|
||||
)
|
||||
parsed_markups = parse_markups(markups[0])
|
||||
logger.debug(parsed_markups)
|
||||
parsed_markups = split_overlapping_ranges(parsed_markups)
|
||||
logger.debug(parsed_markups)
|
||||
for markup in parsed_markups:
|
||||
helper.set_template(markup["start"], markup["end"], markup["template"])
|
||||
# def test_super_duper_overlapsing(self):
|
||||
# # https://medium.com/google-cloud/implementing-semantic-caching-a-step-by-step-guide-to-faster-cost-effective-genai-workflows-ef85d8e72883#bypass
|
||||
# text = "Note: The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers."
|
||||
# helper = RLStringHelper(text)
|
||||
# markups = (
|
||||
# [
|
||||
# {
|
||||
# "__typename": "Markup",
|
||||
# "name": None,
|
||||
# "type": "CODE",
|
||||
# "start": 0,
|
||||
# "end": 5,
|
||||
# "href": None,
|
||||
# "title": None,
|
||||
# "rel": None,
|
||||
# "anchorType": None,
|
||||
# "userId": None,
|
||||
# "creatorIds": None,
|
||||
# },
|
||||
# {
|
||||
# "__typename": "Markup",
|
||||
# "name": None,
|
||||
# "type": "STRONG",
|
||||
# "start": 0,
|
||||
# "end": 6,
|
||||
# "href": None,
|
||||
# "title": None,
|
||||
# "rel": None,
|
||||
# "anchorType": None,
|
||||
# "userId": None,
|
||||
# "creatorIds": None,
|
||||
# },
|
||||
# {
|
||||
# "__typename": "Markup",
|
||||
# "name": None,
|
||||
# "type": "EM",
|
||||
# "start": 0,
|
||||
# "end": 6,
|
||||
# "href": None,
|
||||
# "title": None,
|
||||
# "rel": None,
|
||||
# "anchorType": None,
|
||||
# "userId": None,
|
||||
# "creatorIds": None,
|
||||
# },
|
||||
# ],
|
||||
# )
|
||||
# parsed_markups = parse_markups(markups[0])
|
||||
# logger.debug(parsed_markups)
|
||||
# parsed_markups = split_overlapping_ranges(parsed_markups)
|
||||
# logger.debug(parsed_markups)
|
||||
# for markup in parsed_markups:
|
||||
# helper.set_template(markup["start"], markup["end"], markup["template"])
|
||||
|
||||
expected_pattern = r"<em><strong><code[^>]*>Note:</code> </strong></em>The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers\."
|
||||
assert re.match(expected_pattern, str(helper))
|
||||
# expected_pattern = r"<em><strong><code[^>]*>Note:</code> </strong></em>The patterns and ideas discussed in this post are broadly applicable and can be adopted for other cloud providers\."
|
||||
# assert re.match(expected_pattern, str(helper))
|
||||
|
||||
def test_basic_replace(self):
|
||||
# Replace A to B - ONE to ONE char
|
||||
|
|
@ -90,10 +157,15 @@ class TestRLStringHelper:
|
|||
helper.set_replace(0, 6, "B")
|
||||
assert helper.get_text() == "B - 📊 - ABC"
|
||||
|
||||
helper = RLStringHelper("Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.")
|
||||
helper = RLStringHelper(
|
||||
"Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏."
|
||||
)
|
||||
helper.set_template(0, 200, "<kr>{{text}}</kr>")
|
||||
helper.set_template(0, 200, "<kz>{{text}}</kz>")
|
||||
assert helper.get_text() == "<kz><kr>Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.</kr></kz>"
|
||||
assert (
|
||||
helper.get_text()
|
||||
== "<kz><kr>Your support means the world to me. If you found this article valuable and insightful, please consider giving it a round of applause by clicking the clapping hands icon 👏.</kr></kz>"
|
||||
)
|
||||
|
||||
helper = RLStringHelper("TESERT ALMACOM - 📊 - ABC")
|
||||
helper.set_replace(0, 14, "B")
|
||||
|
|
@ -116,26 +188,26 @@ class TestRLStringHelper:
|
|||
helper = RLStringHelper(issue_text)
|
||||
assert helper.get_text() == issue_text
|
||||
|
||||
def test_markup_parser(self):
|
||||
href_markup = {
|
||||
"__typename": 'Markup',
|
||||
"anchorType": 'LINK',
|
||||
"end": 12,
|
||||
"href": 'https://readwise.io/bookreview/{{book_id',
|
||||
"name": None,
|
||||
"rel": 'nofollow',
|
||||
"start": 0,
|
||||
"title": '',
|
||||
"type": 'A',
|
||||
"userId": None
|
||||
}
|
||||
# def test_markup_parser(self):
|
||||
# href_markup = {
|
||||
# "__typename": 'Markup',
|
||||
# "anchorType": 'LINK',
|
||||
# "end": 12,
|
||||
# "href": 'https://readwise.io/bookreview/{{book_id',
|
||||
# "name": None,
|
||||
# "rel": 'nofollow',
|
||||
# "start": 0,
|
||||
# "title": '',
|
||||
# "type": 'A',
|
||||
# "userId": None
|
||||
# }
|
||||
|
||||
helper = RLStringHelper("Hello world")
|
||||
markups = parse_markups([href_markup])
|
||||
parsed_markups = split_overlapping_ranges(markups)
|
||||
for markup in parsed_markups:
|
||||
helper.set_template(markup["start"], markup["end"], markup["template"])
|
||||
assert helper.get_text() == '<a style="text-decoration: underline;" rel="nofollow" title="" href="https://readwise.io/bookreview/{{book_id" target="_blank">Hello world</a>'
|
||||
# helper = RLStringHelper("Hello world")
|
||||
# markups = parse_markups([href_markup])
|
||||
# parsed_markups = split_overlapping_ranges(markups)
|
||||
# for markup in parsed_markups:
|
||||
# helper.set_template(markup["start"], markup["end"], markup["template"])
|
||||
# assert helper.get_text() == '<a style="text-decoration: underline;" rel="nofollow" title="" href="https://readwise.io/bookreview/{{book_id" target="_blank">Hello world</a>'
|
||||
|
||||
def test_medium_all(self):
|
||||
helper = RLStringHelper("ABC Hello world")
|
||||
|
|
|
|||
Loading…
Reference in a new issue