diff --git a/medium-parser/medium_parser/api.py b/medium-parser/medium_parser/api.py index ad68f26..a27643d 100644 --- a/medium-parser/medium_parser/api.py +++ b/medium-parser/medium_parser/api.py @@ -56,7 +56,7 @@ class MediumApi: "postId": post_id, "postMeteringOptions": {}, }, - "query": "query FullPostQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) { post(id: $postId) { __typename id ...FullPostData } meterPost(postId: $postId, postMeteringOptions: $postMeteringOptions) { __typename ...MeteringInfoData } } fragment UserFollowData on User { id socialStats { followingCount followerCount } viewerEdge { isFollowing } } fragment NewsletterData on NewsletterV3 { id viewerEdge { id isSubscribed } } fragment UserNewsletterData on User { id newsletterV3 { __typename ...NewsletterData } } fragment ImageMetadataData on ImageMetadata { id originalWidth originalHeight focusPercentX focusPercentY alt } fragment CollectionFollowData on Collection { id subscriberCount viewerEdge { isFollowing } } fragment CollectionNewsletterData on Collection { id newsletterV3 { __typename ...NewsletterData } } fragment BylineData on Post { id readingTime creator { __typename id imageId username name bio tippingLink viewerEdge { isUser } ...UserFollowData ...UserNewsletterData } collection { __typename id name avatar { __typename id ...ImageMetadataData } ...CollectionFollowData ...CollectionNewsletterData } isLocked firstPublishedAt latestPublishedVersion } fragment ResponseCountData on Post { postResponses { count } } fragment InResponseToPost on Post { id title creator { name } clapCount responsesCount isLocked } fragment PostVisibilityData on Post { id collection { viewerEdge { isEditor canEditPosts canEditOwnPosts } } creator { id } isLocked visibility } fragment PostMenuData on Post { id title creator { __typename ...UserFollowData } collection { __typename ...CollectionFollowData } } fragment PostMetaData on Post { __typename id title visibility ...ResponseCountData clapCount viewerEdge { clapCount } detectedLanguage mediumUrl readingTime updatedAt isLocked allowResponses isProxyPost latestPublishedVersion isSeries firstPublishedAt previewImage { id } inResponseToPostResult { __typename ...InResponseToPost } inResponseToMediaResource { mediumQuote { startOffset endOffset paragraphs { text type markups { type start end anchorType } } } } inResponseToEntityType canonicalUrl collection { id slug name shortDescription avatar { __typename id ...ImageMetadataData } viewerEdge { isFollowing isEditor canEditPosts canEditOwnPosts isMuting } } creator { id isFollowing name bio imageId mediumMemberAt twitterScreenName viewerEdge { isBlocking isMuting isUser } } previewContent { subtitle } pinnedByCreatorAt ...PostVisibilityData ...PostMenuData } fragment LinkMetadataList on Post { linkMetadataList { url alts { type url } } } fragment MediaResourceData on MediaResource { id iframeSrc thumbnailUrl } fragment IframeData on Iframe { iframeHeight iframeWidth mediaResource { __typename ...MediaResourceData } } fragment MarkupData on Markup { name type start end href title rel type anchorType userId creatorIds } fragment CatalogSummaryData on Catalog { id name description type visibility predefined responsesLocked creator { id name username imageId bio viewerEdge { isUser } } createdAt version itemsLastInsertedAt postItemsCount } fragment CatalogPreviewData on Catalog { __typename ...CatalogSummaryData id itemsConnection(pagingOptions: { limit: 10 } ) { items { entity { __typename ... on Post { id previewImage { id } } } } paging { count } } } fragment MixtapeMetadataData on MixtapeMetadata { mediaResourceId href thumbnailImageId mediaResource { mediumCatalog { __typename ...CatalogPreviewData } } } fragment ParagraphData on Paragraph { id name href text iframe { __typename ...IframeData } layout markups { __typename ...MarkupData } metadata { __typename ...ImageMetadataData } mixtapeMetadata { __typename ...MixtapeMetadataData } type hasDropCap dropCapImage { __typename ...ImageMetadataData } codeBlockMetadata { lang mode } } fragment QuoteData on Quote { id postId userId startOffset endOffset paragraphs { __typename id ...ParagraphData } quoteType } fragment HighlightsData on Post { id highlights { __typename ...QuoteData } } fragment PostFooterCountData on Post { __typename id clapCount viewerEdge { clapCount } ...ResponseCountData responsesLocked mediumUrl title collection { id viewerEdge { isMuting isFollowing } } creator { id viewerEdge { isMuting isFollowing } } } fragment TagNoViewerEdgeData on Tag { id normalizedTagSlug displayTitle followerCount postCount } fragment VideoMetadataData on VideoMetadata { videoId previewImageId originalWidth originalHeight } fragment SectionData on Section { name startIndex textLayout imageLayout videoLayout backgroundImage { __typename ...ImageMetadataData } backgroundVideo { __typename ...VideoMetadataData } } fragment PostBodyData on RichText { sections { __typename ...SectionData } paragraphs { __typename id ...ParagraphData } } fragment FullPostData on Post { __typename ...BylineData ...PostMetaData ...LinkMetadataList ...HighlightsData ...PostFooterCountData tags { __typename id ...TagNoViewerEdgeData } content(postMeteringOptions: $postMeteringOptions) { bodyModel { __typename ...PostBodyData } validatedShareKey } } fragment MeteringInfoData on MeteringInfo { maxUnlockCount unlocksRemaining postIds }", +"query": "query FullPostQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) { post(id: $postId) { __typename id ...FullPostData } meterPost(postId: $postId, postMeteringOptions: $postMeteringOptions) { __typename ...MeteringInfoData } } fragment UserFollowData on User { id socialStats { followingCount followerCount } viewerEdge { isFollowing } } fragment NewsletterData on NewsletterV3 { id viewerEdge { id isSubscribed } } fragment UserNewsletterData on User { id newsletterV3 { __typename ...NewsletterData } } fragment ImageMetadataData on ImageMetadata { id originalWidth originalHeight focusPercentX focusPercentY alt } fragment CollectionFollowData on Collection { id subscriberCount viewerEdge { isFollowing } } fragment CollectionNewsletterData on Collection { id newsletterV3 { __typename ...NewsletterData } } fragment BylineData on Post { id readingTime creator { __typename id imageId username name bio tippingLink viewerEdge { isUser } ...UserFollowData ...UserNewsletterData } collection { __typename id name avatar { __typename id ...ImageMetadataData } ...CollectionFollowData ...CollectionNewsletterData } isLocked firstPublishedAt latestPublishedVersion } fragment ResponseCountData on Post { postResponses { count } } fragment InResponseToPost on Post { id title creator { name } clapCount responsesCount isLocked } fragment PostVisibilityData on Post { id collection { viewerEdge { isEditor canEditPosts canEditOwnPosts } } creator { id } isLocked visibility } fragment PostMenuData on Post { id title creator { __typename ...UserFollowData } collection { __typename ...CollectionFollowData } } fragment PostMetaData on Post { __typename id title visibility ...ResponseCountData clapCount viewerEdge { clapCount } detectedLanguage mediumUrl readingTime updatedAt isLocked allowResponses isProxyPost latestPublishedVersion isSeries firstPublishedAt previewImage { id } inResponseToPostResult { __typename ...InResponseToPost } inResponseToMediaResource { mediumQuote { startOffset endOffset paragraphs { text type markups { type start end anchorType } } } } inResponseToEntityType canonicalUrl collection { id slug name shortDescription avatar { __typename id ...ImageMetadataData } viewerEdge { isFollowing isEditor canEditPosts canEditOwnPosts isMuting } } creator { id isFollowing name bio imageId mediumMemberAt twitterScreenName viewerEdge { isBlocking isMuting isUser } } previewContent { subtitle } pinnedByCreatorAt ...PostVisibilityData ...PostMenuData } fragment LinkMetadataList on Post { linkMetadataList { url alts { type url } } } fragment MediaResourceData on MediaResource { id iframeSrc thumbnailUrl iframeHeight iframeWidth title } fragment IframeData on Iframe { iframeHeight iframeWidth mediaResource { __typename ...MediaResourceData } } fragment MarkupData on Markup { name type start end href title rel type anchorType userId creatorIds } fragment CatalogSummaryData on Catalog { id name description type visibility predefined responsesLocked creator { id name username imageId bio viewerEdge { isUser } } createdAt version itemsLastInsertedAt postItemsCount } fragment CatalogPreviewData on Catalog { __typename ...CatalogSummaryData id itemsConnection(pagingOptions: { limit: 10 } ) { items { entity { __typename ... on Post { id previewImage { id } } } } paging { count } } } fragment MixtapeMetadataData on MixtapeMetadata { mediaResourceId href thumbnailImageId mediaResource { mediumCatalog { __typename ...CatalogPreviewData } } } fragment ParagraphData on Paragraph { id name href text iframe { __typename ...IframeData } layout markups { __typename ...MarkupData } metadata { __typename ...ImageMetadataData } mixtapeMetadata { __typename ...MixtapeMetadataData } type hasDropCap dropCapImage { __typename ...ImageMetadataData } codeBlockMetadata { lang mode } } fragment QuoteData on Quote { id postId userId startOffset endOffset paragraphs { __typename id ...ParagraphData } quoteType } fragment HighlightsData on Post { id highlights { __typename ...QuoteData } } fragment PostFooterCountData on Post { __typename id clapCount viewerEdge { clapCount } ...ResponseCountData responsesLocked mediumUrl title collection { id viewerEdge { isMuting isFollowing } } creator { id viewerEdge { isMuting isFollowing } } } fragment TagNoViewerEdgeData on Tag { id normalizedTagSlug displayTitle followerCount postCount } fragment VideoMetadataData on VideoMetadata { videoId previewImageId originalWidth originalHeight } fragment SectionData on Section { name startIndex textLayout imageLayout videoLayout backgroundImage { __typename ...ImageMetadataData } backgroundVideo { __typename ...VideoMetadataData } } fragment PostBodyData on RichText { sections { __typename ...SectionData } paragraphs { __typename id ...ParagraphData } } fragment FullPostData on Post { __typename ...BylineData ...PostMetaData ...LinkMetadataList ...HighlightsData ...PostFooterCountData tags { __typename id ...TagNoViewerEdgeData } content(postMeteringOptions: $postMeteringOptions) { bodyModel { __typename ...PostBodyData } validatedShareKey } } fragment MeteringInfoData on MeteringInfo { maxUnlockCount unlocksRemaining postIds }", } response_data = None @@ -82,6 +82,8 @@ class MediumApi: return None response_data = response.json() + with open("/app/web/sidufh.json", "wb") as file: + file.write(response.content) except Exception as ex: logger.debug("Failed to make request or parse response") diff --git a/medium-parser/medium_parser/core.py b/medium-parser/medium_parser/core.py index 9a47174..c438de5 100644 --- a/medium-parser/medium_parser/core.py +++ b/medium-parser/medium_parser/core.py @@ -218,6 +218,7 @@ class MediumParser: preview_image_id: str, highlights: list, tags: list, + post_data: dict, ) -> tuple[list, str, str]: paragraphs = content["bodyModel"]["paragraphs"] tags_list = [tag["displayTitle"] for tag in tags] @@ -604,14 +605,75 @@ class MediumParser: ) out_paragraphs.append(embed_template_rendered) elif paragraph["type"] == "IFRAME": - iframe_template = jinja_env.from_string( - '
' - ) - iframe_template_rendered = iframe_template.render( - host_address=self.host_address, - iframe_id=paragraph["iframe"]["mediaResource"]["id"], - ) - out_paragraphs.append(iframe_template_rendered) + logger.debug(f"Processing IFRAME paragraph") + + # First check if we have direct mediaResource in the iframe + media_resource = paragraph.get("iframe", {}).get("mediaResource", {}) + + # If mediaResource is just a reference, look it up in post_data + media_resource_ref = paragraph.get("iframe", {}).get("mediaResource", {}).get("__ref") + if media_resource_ref and not media_resource.get("id") and not media_resource.get("iframeSrc"): + logger.debug(f"Found media resource reference: {media_resource_ref}") + data_payload = post_data.get("data", {}) + if media_resource_ref in data_payload: + media_resource = data_payload[media_resource_ref] + logger.debug(f"Found media resource for ref: {media_resource_ref}") + else: + logger.warning(f"Could not find media resource for ref: {media_resource_ref}") + + # Get iframe source from mediaResource + iframe_src_val = media_resource.get("iframeSrc") + iframe_id = media_resource.get("id") + + # Determine the source URL for the iframe + src = iframe_src_val + if not src and iframe_id: + logger.debug(f"Using fallback iframe URL with ID: {iframe_id}") + src = f"{self.host_address}/render_iframe/{iframe_id}" + + if not src: + logger.warning("No iframe source found, skipping iframe") + current_pos += 1 + continue + + # Get iframe dimensions + iframe_width = media_resource.get("iframeWidth") + iframe_height = media_resource.get("iframeHeight") + + # If dimensions are available in paragraph.iframe directly, use those + if not iframe_width and paragraph.get("iframe", {}).get("iframeWidth"): + iframe_width = paragraph["iframe"]["iframeWidth"] + if not iframe_height and paragraph.get("iframe", {}).get("iframeHeight"): + iframe_height = paragraph["iframe"]["iframeHeight"] + + logger.debug(f"Iframe dimensions: {iframe_width}x{iframe_height}") + + # Render with aspect ratio if we have valid dimensions + if iframe_width and iframe_height and iframe_width > 0: + ratio = (iframe_height / iframe_width) * 100 + iframe_template = jinja_env.from_string( + """
+ +
""" + ) + iframe_template_rendered = iframe_template.render( + src=src, + ratio=f"{ratio:.4f}", + iframe_width=iframe_width or "100%", + iframe_height=iframe_height or "100%", + ) + out_paragraphs.append(iframe_template_rendered) + else: + # Fallback to responsive iframe without aspect ratio + iframe_template = jinja_env.from_string( + '
' + ) + iframe_template_rendered = iframe_template.render( + src=src, + iframe_width=iframe_width or "100%", + iframe_height=iframe_height or "100%", + ) + out_paragraphs.append(iframe_template_rendered) else: logger.error(f"Unknown {paragraph['type']}: {paragraph}") @@ -702,6 +764,7 @@ class MediumParser: post_data["data"]["post"]["previewImage"]["id"], post_data["data"]["post"]["highlights"], post_data["data"]["post"]["tags"], + post_data, ) # Await metadata diff --git a/web/server/handlers/iframe.py b/web/server/handlers/iframe.py index 1e56447..cc15e44 100644 --- a/web/server/handlers/iframe.py +++ b/web/server/handlers/iframe.py @@ -40,9 +40,5 @@ def patch_iframe_content(content: str) -> str: ) soup = BeautifulSoup(content, "html.parser") - iframe_resizer_script = BeautifulSoup( - '', "html.parser" - ).script - soup.head.append(iframe_resizer_script) return soup.prettify() diff --git a/web/server/templates/base.html b/web/server/templates/base.html index 3b82993..f678a03 100644 --- a/web/server/templates/base.html +++ b/web/server/templates/base.html @@ -26,11 +26,58 @@ - - + + + + +