web/core/medium_parser/medium_api.py

77 lines
7.6 KiB
Python
Raw Normal View History

from typing import Optional
2024-01-31 00:48:20 +00:00
import aiohttp
import orjson
2024-01-31 00:48:20 +00:00
from aiohttp_retry import RetryClient
2024-07-21 15:34:32 +00:00
from aiohttp_socks import ProxyConnector
2024-01-31 00:48:20 +00:00
from loguru import logger
from . import retry_options
from .time import get_unix_ms
from .utils import generate_random_sha256_hash
2024-07-21 15:34:32 +00:00
socks_proxy = "socks5h://wgcf1:1080"
2024-01-31 00:48:20 +00:00
2024-07-21 15:34:32 +00:00
async def query_post_by_id(post_id: str, timeout: int = 3, auth_cookies: Optional[str] = None, use_proxy: bool = True):
logger.debug(f"Starting request construction for post {post_id}")
auth_cookies = "" if auth_cookies is None else auth_cookies
2024-04-26 10:41:02 +00:00
2024-07-21 15:34:32 +00:00
connector = ProxyConnector.from_url(socks_proxy) if use_proxy else None
2024-01-31 00:48:20 +00:00
headers = {
"X-APOLLO-OPERATION-ID": generate_random_sha256_hash(),
"X-APOLLO-OPERATION-NAME": "FullPostQuery",
"Accept": "multipart/mixed; deferSpec=20220824, application/json, application/json",
"Accept-Language": "en-US",
"X-Obvious-CID": "android",
"X-Xsrf-Token": "1",
"X-Client-Date": str(get_unix_ms()),
"User-Agent": "AdsBot-Google-Mobile", # "donkey/4.5.1187420", # <---- There is Medium version
2024-01-31 00:48:20 +00:00
"Cache-Control": "public, max-age=-1",
"Content-Type": "application/json",
"Connection": "Keep-Alive",
2024-04-26 10:41:02 +00:00
"Cookie": auth_cookies,
2024-01-31 00:48:20 +00:00
}
graphql_data = {
2024-01-31 00:48:20 +00:00
"operationName": "FullPostQuery",
"variables": {
"postId": post_id,
"postMeteringOptions": {},
},
"query": "query FullPostQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) { post(id: $postId) { __typename id ...FullPostData } meterPost(postId: $postId, postMeteringOptions: $postMeteringOptions) { __typename ...MeteringInfoData } } fragment UserFollowData on User { id socialStats { followingCount followerCount } viewerEdge { isFollowing } } fragment NewsletterData on NewsletterV3 { id viewerEdge { id isSubscribed } } fragment UserNewsletterData on User { id newsletterV3 { __typename ...NewsletterData } } fragment ImageMetadataData on ImageMetadata { id originalWidth originalHeight focusPercentX focusPercentY alt } fragment CollectionFollowData on Collection { id subscriberCount viewerEdge { isFollowing } } fragment CollectionNewsletterData on Collection { id newsletterV3 { __typename ...NewsletterData } } fragment BylineData on Post { id readingTime creator { __typename id imageId username name bio tippingLink viewerEdge { isUser } ...UserFollowData ...UserNewsletterData } collection { __typename id name avatar { __typename id ...ImageMetadataData } ...CollectionFollowData ...CollectionNewsletterData } isLocked firstPublishedAt latestPublishedVersion } fragment ResponseCountData on Post { postResponses { count } } fragment InResponseToPost on Post { id title creator { name } clapCount responsesCount isLocked } fragment PostVisibilityData on Post { id collection { viewerEdge { isEditor canEditPosts canEditOwnPosts } } creator { id } isLocked visibility } fragment PostMenuData on Post { id title creator { __typename ...UserFollowData } collection { __typename ...CollectionFollowData } } fragment PostMetaData on Post { __typename id title visibility ...ResponseCountData clapCount viewerEdge { clapCount } detectedLanguage mediumUrl readingTime updatedAt isLocked allowResponses isProxyPost latestPublishedVersion isSeries firstPublishedAt previewImage { id } inResponseToPostResult { __typename ...InResponseToPost } inResponseToMediaResource { mediumQuote { startOffset endOffset paragraphs { text type markups { type start end anchorType } } } } inResponseToEntityType canonicalUrl collection { id slug name shortDescription avatar { __typename id ...ImageMetadataData } viewerEdge { isFollowing isEditor canEditPosts canEditOwnPosts isMuting } } creator { id isFollowing name bio imageId mediumMemberAt twitterScreenName viewerEdge { isBlocking isMuting isUser } } previewContent { subtitle } pinnedByCreatorAt ...PostVisibilityData ...PostMenuData } fragment LinkMetadataList on Post { linkMetadataList { url alts { type url } } } fragment MediaResourceData on MediaResource { id iframeSrc thumbnailUrl } fragment IframeData on Iframe { iframeHeight iframeWidth mediaResource { __typename ...MediaResourceData } } fragment MarkupData on Markup { name type start end href title rel type anchorType userId creatorIds } fragment CatalogSummaryData on Catalog { id name description type visibility predefined responsesLocked creator { id name username imageId bio viewerEdge { isUser } } createdAt version itemsLastInsertedAt postItemsCount } fragment CatalogPreviewData on Catalog { __typename ...CatalogSummaryData id itemsConnection(pagingOptions: { limit: 10 } ) { items { entity { __typename ... on Post { id previewImage { id } } } } paging { count } } } fragment MixtapeMetadataData on MixtapeMetadata { mediaResourceId href thumbnailImageId mediaResource { mediumCatalog { __typename ...CatalogPreviewData } } } fragment ParagraphData on Paragraph { id name href text iframe { __typename ...IframeData } layout markups { __typename ...MarkupData } metadata { __typename ...ImageMetadataData } mixtapeMetadata { __typename ...MixtapeMetadataData } type hasDropCap dropCapImage { __typename ...ImageMetadataData } codeBlockMetadata { lang mode } } fragment QuoteData on Quote { id postId userId startOffset endOffset paragraphs { __typename id ...ParagraphData } quoteType } fragment HighlightsData on Post { id highlights { __typename ...QuoteData } } fragment PostFooterCountData on Post { __typename id clapCount
}
response_data = None
exception = None
logger.debug(f"Request started...")
2024-07-21 15:34:32 +00:00
async with aiohttp.ClientSession(connector=connector) as session:
async with RetryClient(client_session=session, raise_for_status=False, retry_options=retry_options) as retry_client:
async with retry_client.post(
2024-01-31 00:48:20 +00:00
"https://medium.com/_/graphql",
headers=headers,
json=graphql_data,
2024-01-31 00:48:20 +00:00
timeout=timeout,
) as request:
if request.status != 200:
logger.error(f"Failed to fetch post by ID {post_id} with status code: {request.status}")
return None
try:
response_data = await request.json(loads=orjson.loads)
except Exception as ex:
logger.debug("Failed to parse response data as JSON")
logger.exception(ex)
exception = ex
logger.debug(f"Request finished...")
if exception:
logger.error(f"Exception occured while fetching post {post_id}, so let's just fuck it up")
raise exception
2024-01-31 00:48:20 +00:00
return response_data