Respect Reddit API limits

With Pushshift's increase in the maximum request size from 100 to 250
comments, this also increases the Reddit request rate by 2.5x. This
makes it possible to overrun the API limits, so start tracking and
respecting them. Also switch back to logging into Reddit to increase
these limits.
This commit is contained in:
Christopher Gurnee 2022-10-10 17:05:48 +00:00
parent ec0fc925b4
commit be7064124f
3 changed files with 118 additions and 59 deletions

View file

@ -1,47 +1,49 @@
//import { fetchJson } from '../../utils'
//
//// Change this to your own client ID: https://www.reddit.com/prefs/apps
//// The app NEEDS TO BE an installed app and NOT a web apps
//
//// Current using dummy ID from throwaway
//const clientID = 'ZmYXJ5RaSDhF-77YaFulWw'
//
//// Token for reddit API
//let token
//
//const getToken = () => {
// // We have already gotten a token
// if (token !== undefined) {
// return Promise.resolve(token)
// }
//
// // Headers for getting reddit api token
// const tokenInit = {
// headers: {
// Authorization: `Basic ${window.btoa(`${clientID}:`)}`,
// 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8'
// },
// method: 'POST',
// body: `grant_type=${encodeURIComponent('https://oauth.reddit.com/grants/installed_client')}&device_id=DO_NOT_TRACK_THIS_DEVICE`
// }
//
// return fetchJson('https://www.reddit.com/api/v1/access_token', tokenInit)
// .then(response => {
// token = response.access_token
// return token
// })
// .catch(error => {
// console.error('reddit.getToken ->')
// throw error
// })
//}
//
//// Get header for general api calls
//export const getAuth = () => {
// return getToken()
// .then(token => ({
// headers: {
// Authorization: `bearer ${token}`
// }
// }))
//}
import { fetchJson } from '../../utils'
// Change this to your own client ID: https://www.reddit.com/prefs/apps
// The app NEEDS TO BE an installed app and NOT a web app
const clientID = 'NAhiRYXXEFeIXyFazmhGHQ'
// Token for reddit API
let token
// TODO: only permit one getToken to run at a time
// TODO: respect response.expires_in
// TODO: respect login API limits?
const getToken = () => {
// We have already gotten a token
if (token !== undefined) {
return Promise.resolve(token)
}
// Headers for getting reddit api token
const tokenInit = {
headers: {
Authorization: `Basic ${window.btoa(`${clientID}:`)}`,
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8'
},
method: 'POST',
body: `grant_type=${encodeURIComponent('https://oauth.reddit.com/grants/installed_client')}&device_id=DO_NOT_TRACK_THIS_DEVICE`
}
return fetchJson('https://www.reddit.com/api/v1/access_token', tokenInit)
.then(response => {
token = response.access_token
return token
})
.catch(error => {
console.error('reddit.getToken ->')
throw error
})
}
// Get header for general api calls
export const getAuth = () => {
return getToken()
.then(token => ({
headers: {
Authorization: `bearer ${token}`
}
}))
}

View file

@ -1,8 +1,59 @@
import { fetchJson } from '../../utils'
import { getAuth } from './auth'
import { fetchJsonAndHeaders, sleep } from '../../utils'
export const chunkSize = 100;
const baseURL = 'https://api.reddit.com'
const requestSettings = {headers: {"Accept-Language": "en"}}
const baseURL = 'https://oauth.reddit.com'
let limitDefault = 300
let limitRemaining = limitDefault, limitResetAtMS = 0
// Fetch JSON results from the Reddit API, respecting the reported API limits
const fetchJson = async url => {
const init = await getAuth()
if (limitRemaining <= 0) {
const waitMS = limitResetAtMS - Date.now() + 1000
if (waitMS > 0) {
// TODO: update the UI to notify user of a delay
console.log(`Waiting ${waitMS}ms for Reddit API`)
await sleep(waitMS)
}
if (limitRemaining <= 0)
limitRemaining = limitDefault
}
limitRemaining--
init.headers['Accept-Language'] = 'en'
const response = await fetchJsonAndHeaders(url, init)
const headers = response.headers
const reportedLimitRemaining = parseInt(headers.get('X-Ratelimit-Remaining'))
const reportedLimitDefault = reportedLimitRemaining + parseInt(headers.get('X-Ratelimit-Used'))
if (reportedLimitDefault && reportedLimitDefault != limitDefault) {
// This should only happen if Reddit changes the API limits
console.warn('Correcting limitDefault from', limitDefault, 'to', reportedLimitDefault)
limitDefault = reportedLimitDefault
}
const reportedLimitResetAtMS = parseInt(headers.get('X-Ratelimit-Reset')) * 1000 + Date.now()
if (reportedLimitResetAtMS > limitResetAtMS + 30000) {
// This happens each time the Reddit API resets our limit
console.debug('Resetting limitResetAtMS from', limitResetAtMS, 'to', reportedLimitResetAtMS)
limitResetAtMS = reportedLimitResetAtMS
} else {
if (reportedLimitResetAtMS < limitResetAtMS) {
// This happens sporadically due to jitter
console.debug('Decreasing limitResetAtMS from', limitResetAtMS, 'to', reportedLimitResetAtMS)
limitResetAtMS = reportedLimitResetAtMS
}
if (reportedLimitRemaining < limitRemaining) {
// This probably shouldn't happen unless Reddit decreases the API limits
console.warn('Decreasing limitRemaining from', limitRemaining, 'to', reportedLimitRemaining)
limitRemaining = reportedLimitRemaining
}
}
return response.json
}
const errorHandler = (origError, from) => {
console.error(from + ': ' + origError)
@ -25,7 +76,7 @@ const errorHandler = (origError, from) => {
// Return the post itself
export const getPost = threadID => (
fetchJson(`${baseURL}/comments/${threadID}.json?limit=1`, requestSettings)
fetchJson(`${baseURL}/comments/${threadID}.json?limit=1`)
.then(thread => thread[0].data.children[0].data)
.catch(error => errorHandler(error, 'reddit.getPost'))
)
@ -39,7 +90,7 @@ export const getPost = threadID => (
// Fetch multiple comments by id
export const getComments = commentIDs => (
fetchJson(`${baseURL}/api/info?id=${commentIDs.map(id => `t1_${id}`).join()}`, requestSettings)
fetchJson(`${baseURL}/api/info?id=${commentIDs.map(id => `t1_${id}`).join()}`)
.then(results => results.data.children.map(({data}) => data))
.catch(error => errorHandler(error, 'reddit.getComments'))
)
@ -48,8 +99,7 @@ export const getComments = commentIDs => (
export const getParentComments = (threadID, commentID, parents) => {
parents = Math.min(parents, 8)
return fetchJson(
`${baseURL}/comments/${threadID}?comment=${commentID}&context=${parents}&limit=${parents}&threaded=false&showmore=false`,
requestSettings
`${baseURL}/comments/${threadID}?comment=${commentID}&context=${parents}&limit=${parents}&threaded=false&showmore=false`
)
.then(results => {
const { children } = results[1].data

View file

@ -3,13 +3,20 @@ import SnuOwnd from 'snuownd'
const markdown = SnuOwnd.getParser()
// Fetches JSON at the given url or throws a descriptive Error
export const fetchJson = (url, init = {}) =>
export const fetchJson = (url, init = {}) => fetchJsonAndHeaders(url, init)
.then(response => response.json)
// Fetches JSON, returning an object with a .json and a .headers member
export const fetchJsonAndHeaders = (url, init = {}) =>
window.fetch(url, init)
.then(response => response.ok ?
response.json()
.catch(error => {
throw new Error((response.statusText || response.status) + ', ' + error)
}) :
{
json: response.json()
.catch(error => {
throw new Error((response.statusText || response.status) + ', ' + error)
}),
headers: response.headers
} :
response.text()
.catch(error => {
throw new Error((response.statusText || response.status) + ', ' + error)