From 4b8bb415bf92ce1e81c90cd8ceb1cbf020c49f7a Mon Sep 17 00:00:00 2001 From: Christopher Gurnee Date: Fri, 6 May 2022 19:13:18 +0000 Subject: [PATCH] WIP of implementing context w/o PS ID querying This implements the `context` query parameter without querying the Pushshift comment endpoint with `ids`, which until recently only worked for certain time periods. Now that this is fixed for all comments after Oct/23/2017 21:55, it makes sense to use a more simple solution instead. --- src/api/reddit/index.js | 18 +++ src/pages/thread/Comment.js | 4 + src/pages/thread/CommentSection.js | 19 ++- src/pages/thread/index.js | 220 +++++++++++++++++++++-------- src/sass/comment.sass | 5 + 5 files changed, 201 insertions(+), 65 deletions(-) diff --git a/src/api/reddit/index.js b/src/api/reddit/index.js index 76f4967..a025cdc 100644 --- a/src/api/reddit/index.js +++ b/src/api/reddit/index.js @@ -43,3 +43,21 @@ export const getComments = commentIDs => ( .then(results => results.data.children.map(({data}) => data)) .catch(error => errorHandler(error, 'reddit.getComments')) ) + +// Fetch up to 8 of a comment's parents +export const getParentComments = (threadID, commentID, parents) => { + parents = Math.min(parents, 8) + return fetchJson( + `${baseURL}/comments/${threadID}?comment=${commentID}&context=${parents}&limit=${parents}&threaded=false&showmore=false`, + requestSettings + ) + .then(results => { + const { children } = results[1].data + // If there are fewer parents than requested, remove the comments which aren't parents + const idx = children.findIndex(c => c.data.id == commentID) + if (idx >= 0) + children.splice(idx + 1) + return children.map(({data}) => data) + }) + .catch(error => errorHandler(error, 'reddit.getParentComments')) +} diff --git a/src/pages/thread/Comment.js b/src/pages/thread/Comment.js index ca6ef66..76da311 100644 --- a/src/pages/thread/Comment.js +++ b/src/pages/thread/Comment.js @@ -12,6 +12,9 @@ const Comment = (props) => { } else { commentStyle += props.depth % 2 === 0 ? 'comment-even' : 'comment-odd' } + if (props.id == props.highlightedID) { + commentStyle += ' highlighted' + } let innerHTML, editedInnerHTML; if (props.removed && isRemoved(props.body)) { @@ -94,6 +97,7 @@ const Comment = (props) => { {...comment} depth={props.depth + 1} postAuthor={props.postAuthor} + highlightedID={props.highlightedID} /> ))} diff --git a/src/pages/thread/CommentSection.js b/src/pages/thread/CommentSection.js index 59ce4d5..571cf84 100644 --- a/src/pages/thread/CommentSection.js +++ b/src/pages/thread/CommentSection.js @@ -6,7 +6,7 @@ import { showRemovedAndDeleted, showRemoved, showDeleted } from '../../utils' -const unflatten = (commentMap, rootID, postID) => { +const unflatten = (commentMap, rootID, context, postID) => { const commentTree = [] commentMap.forEach(comment => { @@ -58,6 +58,12 @@ const unflatten = (commentMap, rootID, postID) => { replies: missingRootReplies } } + let newRoot + while (context && rootComment.parent_id && (newRoot = commentMap.get(rootComment.parent_id))) { + newRoot.replies = [rootComment] + rootComment = newRoot + context-- + } return [rootComment] } } @@ -95,13 +101,13 @@ const filterCommentTree = (comments, filterFunction) => { return hasOkComment } -let commentTree, lastTotal, lastRoot, lastFilter, lastSort, lengthBeforeFiltering +let commentTree, lastTotal, lastRoot, lastContext, lastFilter, lastSort, lengthBeforeFiltering const commentSection = (props) => { console.time('Build comment tree') - const {total, root, commentFilter, commentSort} = props + const {total, root, context, commentFilter, commentSort} = props - const needsRebuild = !(total === lastTotal && root === lastRoot && ( + const needsRebuild = !(total === lastTotal && root === lastRoot && context === lastContext && ( commentFilter === lastFilter || lastFilter === filter.all || lastFilter === filter.removedDeleted && ( @@ -110,7 +116,7 @@ const commentSection = (props) => { ) )) if (needsRebuild) { - commentTree = unflatten(props.comments, root, props.postID) + commentTree = unflatten(props.comments, root, context, props.postID) lengthBeforeFiltering = commentTree.length } @@ -142,6 +148,7 @@ const commentSection = (props) => { lastSort = commentSort console.timeEnd('Build comment tree') + props.setMoreContextAvail(commentTree.length > 0 && commentTree[0].parent_id != commentTree[0].link_id) props.setAllCommentsFiltered(commentTree.length == 0 && lengthBeforeFiltering > 0) console.time('Build html tree') @@ -153,6 +160,7 @@ const commentSection = (props) => { {...comment} depth={0} postAuthor={props.postAuthor} + highlightedID={context ? root : null} /> )) :

No comments found

@@ -164,6 +172,7 @@ const commentSection = (props) => { const areEqual = (prevProps, nextProps) => { if (prevProps.commentFilter !== nextProps.commentFilter || prevProps.commentSort !== nextProps.commentSort || + prevProps.context !== nextProps.context || prevProps.root !== nextProps.root) return false if (nextProps.reloadingComments) diff --git a/src/pages/thread/index.js b/src/pages/thread/index.js index cb5f72f..87857b6 100644 --- a/src/pages/thread/index.js +++ b/src/pages/thread/index.js @@ -3,6 +3,7 @@ import { Link } from 'react-router-dom' import { getPost, getComments as getRedditComments, + getParentComments, chunkSize as redditChunkSize } from '../../api/reddit' import { @@ -52,16 +53,22 @@ class ChunkedQueue { const EARLIEST_CREATED = 1 class Thread extends React.Component { + // For state.context: + // undefined - ignore the context query parameter + // an int - the current context; will be updated if query param changes state = { post: {}, pushshiftCommentLookup: new Map(), removed: 0, deleted: 0, + context: undefined, + moreContextAvail: true, allCommentsFiltered: false, loadedAllComments: false, loadingComments: true, reloadingComments: false } + nextMoreContextAvail = true nextAllCommentsFiltered = false // A 'contig' is an object representing a contiguous block of comments currently being downloaded or already @@ -212,8 +219,9 @@ class Thread extends React.Component { }) // The max_comments query parameter can increase the initial comments-to-download - const maxComments = Math.max(this.props.global.maxComments, constrainMaxComments( - parseInt((new URLSearchParams(this.props.location.search)).get('max_comments')))) + const searchParams = new URLSearchParams(this.props.location.search) + const maxComments = Math.max(this.props.global.maxComments, + constrainMaxComments(parseInt(searchParams.get('max_comments')))) // Get comments starting from the earliest available (not a permalink) if (commentID === undefined) { @@ -235,7 +243,13 @@ class Thread extends React.Component { return } this.contigs.unshift({firstCreated: comment?.created_utc || EARLIEST_CREATED}) - this.getComments(maxComments, false, comment) + if (parseInt(searchParams.get('context')) > 0) { + this.getComments(maxComments, false, comment, false) + .then(() => this.setState({ context: 0 })) // initial state; will be updated in componentDidUpdate() + } else { + this.getComments(maxComments, false, comment) + this.state.context = 0 + } }) .catch(() => { this.contigs.unshift({firstCreated: EARLIEST_CREATED}) @@ -277,7 +291,10 @@ class Thread extends React.Component { } componentDidUpdate () { - let { loadingComments } = this.state + let { loadingComments, pushshiftCommentLookup } = this.state + const { commentID } = this.props.match.params + const { location } = this.props + const requestedContext = commentID ? parseInt((new URLSearchParams(location.search)).get('context')) : 0 // If the max-to-download Reload button or 'load more comments' was clicked const { loadingMoreComments } = this.props.global.state @@ -289,11 +306,14 @@ class Thread extends React.Component { this.updateCurContig() this.getComments(loadingMoreComments, true) - // Otherwise if we're loading a comment tree we haven't downloaded yet + // If we're loading a comment tree we haven't downloaded yet + // TODO: when switching to an existing contig that's been downloaded via the + // "additional context" code below (which only downloads 100 comments + // per contig), this code branch should download an additional + // global.maxComments-100 comments, in persistant mode, to the contig. } else if (!loadingComments && !this.state.reloadingComments && !this.updateCurContig()) { // If we haven't downloaded from the earliest available yet (not a permalink) - const { commentID } = this.props.match.params if (commentID === undefined) { loadingComments = true this.setState({loadingComments}) @@ -306,7 +326,7 @@ class Thread extends React.Component { // If we haven't downloaded this permalink yet } else if (!this.commentIdAttempts.has(commentID)) { this.commentIdAttempts.add(commentID) - this.setState({reloadingComments: true}) + this.setState({reloadingComments: true, context: 0}) this.props.global.setLoading('Loading comments...') console.time('Load comments') let createdUtcNotFound // true if Reddit doesn't have the comment's created_utc @@ -319,6 +339,8 @@ class Thread extends React.Component { insertBefore = this.contigs.length // If comment isn't inside an existing contig, create a new one and start downloading + // TODO: see the TODO just above - add a flag to the contig to + // indicate that the download was for only 100 comments? if (insertBefore == 0 || created_utc >= this.contigs[insertBefore - 1].lastCreated) { this.contigs.splice(insertBefore, 0, {firstCreated: created_utc}) this.setCurContig(insertBefore) @@ -327,13 +349,12 @@ class Thread extends React.Component { // Otherwise an earlier attempt to download it from Pushshift turned up nothing, } else { - const { pushshiftCommentLookup } = this.state this.fullnamesToShortIDs(comment) this.useRedditComment(comment) // so use the Reddit comment instead this.setCurContig(insertBefore - 1) // (this was the failed earlier attempt) console.timeEnd('Load comments') this.props.global.setSuccess() - this.setState({pushshiftCommentLookup, loadingComments: false, reloadingComments: false}) + this.setState({loadingComments: false, reloadingComments: false}) } } else createdUtcNotFound = true @@ -352,9 +373,57 @@ class Thread extends React.Component { } }) } + + // If additional context needs to be downloaded + } else if (requestedContext > this.state.context) { + this.state.context = requestedContext + if (!this.state.loadingComments) { + this.setState({reloadingComments: true}) + this.props.global.setLoading('Loading comments...') + console.time('Load comments') + } + const origContigIdx = this.curContigIdx + getParentComments(this.props.match.params.threadID, commentID, requestedContext) + .then(async comments => { + const lastComment = comments[comments.length - 1] + for (let comment of comments) { + if (!pushshiftCommentLookup.has(comment.id)) { + this.redditIdsToPushshift(comment) + const created_utc = comment.created_utc + const insertBefore = this.contigs.findIndex(contig => created_utc < contig.firstCreated) + + // If comment isn't inside an existing contig, create a new one and start downloading + if (insertBefore == 0 || created_utc >= this.contigs[insertBefore - 1].lastCreated) { + this.contigs.splice(insertBefore, 0, {firstCreated: created_utc}) + this.curContigIdx = insertBefore + await this.getComments(pushshiftChunkSize, false, comment, comment === lastComment) + if (this.stopLoading) + break + + // Otherwise an earlier attempt to download it from Pushshift turned up nothing, + } else { + this.useRedditComment(comment) // so use the Reddit comment instead + if (comment === lastComment) + this.getComments(0) // wait for pending Reddit comments & update state + } + + } else if (comment === lastComment) + this.getComments(0) // wait for pending Reddit comments & update state + } + }) +// // TODO: Error handling: +// // 1) just download global.maxComments into an existing or new contig? +// // 2) query the parent_id chain from Pushshift (one request per parent)? +// .catch(() => { +// // ... +// this.getComments(this.props.global.maxComments) +// }) + .finally(() => this.curContigIdx = origContigIdx) } - const { location } = this.props + if (!requestedContext && this.state.context || requestedContext < this.state.context) + this.setState({context: requestedContext || 0}) + if (location.state?.scrollBehavior && location.hash.length > 1 && !loadingComments && !this.props.global.isErrored()) { const hashElem = document.getElementById(location.hash.substring(1)) @@ -364,6 +433,8 @@ class Thread extends React.Component { } } + if (this.nextMoreContextAvail != this.state.moreContextAvail) + this.setState({moreContextAvail: this.nextMoreContextAvail}) if (this.nextAllCommentsFiltered != this.state.allCommentsFiltered) this.setState({allCommentsFiltered: this.nextAllCommentsFiltered}) } @@ -374,11 +445,16 @@ class Thread extends React.Component { // been completed and merged with the next contig. // commentHint: a Reddit comment for use if Pushshift is missing that same comment; // its ids must have already been updated by fullnamesToShortIDs() - getComments (newCommentCount, persistent = false, commentHint = undefined) { + // setState: if true, will call setState to update the page once completed; + // note that if false, persistent is ignored and treated as false + // Returns: a Promise which resolves after comments have been retrieved and processed + // from Pushshift (but possibly before they've been retrieved from Reddit) + redditIdQueue = new ChunkedQueue(redditChunkSize) + redditPromises = [] + getComments (newCommentCount, persistent = false, commentHint = undefined, setState = true) { const { threadID, commentID } = this.props.match.params const { pushshiftCommentLookup } = this.state - const redditIdQueue = new ChunkedQueue(redditChunkSize) - const pushshiftPromises = [], redditPromises = [] + const pushshiftPromises = [] let doRedditComments // Process a chunk of comments downloaded from Pushshift (called by getPushshiftComments() below) @@ -390,18 +466,18 @@ class Thread extends React.Component { const { id, parent_id } = comment if (!pushshiftCommentLookup.has(id)) { pushshiftCommentLookup.set(id, comment) - redditIdQueue.push(id) + this.redditIdQueue.push(id) count++ // When viewing the full thread (to prevent false positives), if a parent_id is a comment // (not a post/thread) and it's missing from Pushshift, try to get it from Reddit instead. if (commentID === undefined && parent_id != threadID && !pushshiftCommentLookup.has(parent_id)) { pushshiftCommentLookup.set(parent_id, undefined) // prevents adding it to the Queue multiple times - redditIdQueue.push(parent_id) + this.redditIdQueue.push(parent_id) } } }) - while (redditIdQueue.hasFullChunk()) - doRedditComments(redditIdQueue.shiftChunk()) + while (this.redditIdQueue.hasFullChunk()) + doRedditComments(this.redditIdQueue.shiftChunk()) return count })) } @@ -409,7 +485,7 @@ class Thread extends React.Component { } // Download a list of comments by id from Reddit, and process them - doRedditComments = ids => redditPromises.push(getRedditComments(ids) + doRedditComments = ids => this.redditPromises.push(getRedditComments(ids) .then(comments => { let removed = 0, deleted = 0 comments.forEach(comment => { @@ -454,28 +530,37 @@ class Thread extends React.Component { // Download comments from Pushshift into the current contig, and process each chunk (above) as it's retrieved const after = this.curContig().lastCreated - 1 || this.curContig().firstCreated - 1 const before = this.nextContig()?.firstCreated + 1 - getPushshiftComments(processPushshiftComments, threadID, newCommentCount, after, before) + return (newCommentCount ? + getPushshiftComments(processPushshiftComments, threadID, newCommentCount, after, before) : + Promise.resolve([undefined, false]) + ) .then(([lastCreatedUtc, curContigLoadedAll]) => { // Update the contigs array - if (curContigLoadedAll) { - if (before) { - this.curContig().lastCreated = before - 1 - this.mergeContigs() - } else { + if (newCommentCount) { + if (curContigLoadedAll) { + if (before) { + this.curContig().lastCreated = before - 1 + this.mergeContigs() + } else { + this.curContig().lastCreated = lastCreatedUtc + this.curContig().loadedAllComments = true + } + } else this.curContig().lastCreated = lastCreatedUtc - this.curContig().loadedAllComments = true - } - } else - this.curContig().lastCreated = lastCreatedUtc - if (this.stopLoading) - return + if (this.stopLoading) + return + } // Finished retrieving comments from Pushshift; wait for processing to finish - this.props.global.setLoading('Comparing comments...') - Promise.all(pushshiftPromises).then(lengths => { - const pushshiftComments = lengths.reduce((a,b) => a+b, 0) - console.log('Pushshift:', pushshiftComments, 'comments') + if (setState) + this.props.global.setLoading('Comparing comments...') + return Promise.all(pushshiftPromises).then(lengths => { // this is the promise that's returned + let pushshiftComments + if (newCommentCount) { + pushshiftComments = lengths.reduce((a,b) => a+b, 0) + console.log('Pushshift:', pushshiftComments, 'comments') + } // If Pushshift didn't find the Reddit commentHint, but should have, use Reddit's comment if (commentHint && !pushshiftCommentLookup.has(commentHint.id) && @@ -487,30 +572,33 @@ class Thread extends React.Component { } // All comments from Pushshift have been processed; wait for Reddit to finish - while (!redditIdQueue.isEmpty()) - doRedditComments(redditIdQueue.shiftChunk()) - Promise.all(redditPromises).then(lengths => { - console.log('Reddit:', lengths.reduce((a,b) => a+b, 0), 'comments') + if (setState) { + while (!this.redditIdQueue.isEmpty()) + doRedditComments(this.redditIdQueue.shiftChunk()) + Promise.all(this.redditPromises).then(lengths => { + console.log('Reddit:', lengths.reduce((a,b) => a+b, 0), 'comments') + this.redditPromises.splice(0) - if (!this.stopLoading) { - const loadedAllComments = Boolean(this.curContig().loadedAllComments) - if (persistent && !loadedAllComments && pushshiftComments <= newCommentCount - pushshiftChunkSize) - this.getComments(newCommentCount - pushshiftComments, true, commentHint) + if (!this.stopLoading) { + const loadedAllComments = Boolean(this.curContig().loadedAllComments) + if (persistent && !loadedAllComments && pushshiftComments <= newCommentCount - pushshiftChunkSize) + this.getComments(newCommentCount - pushshiftComments, true, commentHint) - else { - console.timeEnd('Load comments') - this.props.global.setSuccess() - this.setState({ - pushshiftCommentLookup, - removed: this.state.removed, - deleted: this.state.deleted, - loadedAllComments, - loadingComments: false, - reloadingComments: false - }) + else { + console.timeEnd('Load comments') + this.props.global.setSuccess() + this.setState({ + pushshiftCommentLookup, + removed: this.state.removed, + deleted: this.state.deleted, + loadedAllComments, + loadingComments: false, + reloadingComments: false + }) + } } - } - }) + }) + } }) }) .catch(e => { @@ -557,19 +645,30 @@ class Thread extends React.Component { <> {isSingleComment &&
-
you are viewing a single comment's thread.
+
you are viewing a single comment's thread.
{this.state.reloadingComments ? -
view the rest of the comments
: - ({ + view the rest of the comments → : + ({ pathname: `/r/${subreddit}/comments/${id}/_/`, hash: '#comment-info', state: {scrollBehavior: 'smooth'}} - )}>view the rest of the comments + )}>view the rest of the comments → } -
+ {this.state.moreContextAvail && this.state.context < 8 && <> + + {this.state.reloadingComments ? + view more context → : + ({ + pathname: `/r/${subreddit}/comments/${id}/_/${commentID}/`, + search: `?context=${this.state.context < 4 ? 4 : 8}`} + )}>view more context → + } + } +
} this.nextMoreContextAvail = avail} setAllCommentsFiltered={filtered => this.nextAllCommentsFiltered = filtered} />