diff --git a/.gitignore b/.gitignore index 646c1d68b..74a26db5a 100755 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,11 @@ public/garden public/licenses.txt syndications/cache +syndications/ignored_subreddits.txt syndications/indiekit_token.json syndications/liked_songs.json syndications/media_urls.json +syndications/reddit-export +syndications/reddit_credentials.json syndications/youtube_credentials.json syndications/youtube_token.json diff --git a/bun.lockb b/bun.lockb index 7c5ef3612..efde0dec7 100644 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index a00822f79..106da062a 100644 --- a/package.json +++ b/package.json @@ -26,10 +26,13 @@ "@nuxtjs/sitemap": "latest", "@tresjs/nuxt": "latest", "nuxt": "^3.13.0", + "octokat": "^0.10.0", "sass-embedded": "^1.79.2" }, "devDependencies": { "@types/bun": "^1.1.10", + "@types/user-agents": "^1.0.4", + "csv-parse": "^5.5.6", "dotenv": "^16.4.5", "feed": "^4.2.2", "file-type": "^19.5.0", @@ -37,6 +40,8 @@ "googleapis": "^144.0.0", "open": "^10.1.0", "run-script-os": "^1.1.6", + "snoowrap": "^1.23.0", + "user-agents": "^1.1.325", "word-counting": "^1.1.4" } } diff --git a/syndications/archive_utils.ts b/syndications/archive_utils.ts new file mode 100644 index 000000000..2a095d082 --- /dev/null +++ b/syndications/archive_utils.ts @@ -0,0 +1,15 @@ +export async function getArchiveUrl(url: string, timestamp?: number) { + const trimmedUrl = encodeURIComponent(url.replace(/^https?:\/\/(www\.)?/, '')); + const archiveInfoUrl = + `http://archive.org/wayback/available?url=${trimmedUrl}×tamp=${timestamp}`; + const archiveResponse = await fetch(archiveInfoUrl).then(r => r.text()); + let archiveJson; + try { + archiveJson = JSON.parse(archiveResponse); + } catch (err) { + console.error("Unexpected response from wayback machine:", archiveInfoUrl, archiveResponse, + err); + process.exit(0); + } + return archiveJson.archived_snapshots?.closest?.timestamp as string | undefined; +} diff --git a/syndications/custom_data.ts b/syndications/custom_data.ts new file mode 100644 index 000000000..89d12d188 --- /dev/null +++ b/syndications/custom_data.ts @@ -0,0 +1,81 @@ +import { getProperties, sendMessage, uploadMedia } from "./indiekit"; + +async function getInput(prompt: string) { + console.log(prompt); + for await (const code of console as unknown as AsyncIterable) { + return code; + } + throw "Couldn't get input from user; This shouldn't happen"; +} + +async function updatePost(url: string, properties: Record, action = "update") { + await sendMessage(JSON.stringify({ + action, + url, + ...properties + })).then(async res => { + if ([200, 201, 204].includes(res.status)) { + console.log(await res.json()); + } else { + console.warn("Failed to send message to indiekit", res, await res.text()); + throw res; + } + }); +} + +const command = process.argv[2]; +if (command === "list") { + listReplies(); +} else if (command === "add") { + addReply(); +} else if (command === "clear") { + clearReplies(); +} else { + console.log("Call this script with the command you'd like to perform:"); + console.log(`${process.argv.slice(0, 1).join(" ")} [COMMAND]`); + console.log("Available commands:"); + console.log(" list: Print the replies on a post"); + console.log(" add: Add a new reply to a post"); + console.log(" clear: Remove all replies from a post"); +} + +async function listReplies() { + const url = await getInput("Input the url of the post to list the replies of:"); + await getProperties(url, "replies").then(({ replies }) => + console.log(...(replies as unknown[]).map(r => JSON.stringify(r))) + ); +} + +async function addReply() { + const url = await getInput("Input the url of the post add a reply to:"); + const syndication = await getInput("Input the url of the reply:"); + const body = await getInput("Input the html of the reply:"); + const published = await getInput("Input the timestamp of the reply:"); + const author = { + name: await getInput("Input the name of the reply's author:"), + url: await getInput("Input the canonical url of the reply's author:"), + image: await uploadMedia(await getInput("Input the address of the reply's author's pfp:")) + }; + + await updatePost(url, { + add: { + replies: [ + { + body, + author, + published: new Date( + Number.isNaN(parseInt(published)) ? published : parseInt(published) + ).getTime(), + syndication + } + ] + } + }); +} + +async function clearReplies() { + const url = await getInput("Input the url of the post to clear the replies of:"); + await updatePost(url, { + delete: "replies" + }); +} diff --git a/syndications/indiekit.ts b/syndications/indiekit.ts index 6ad61fcf2..66f243e2f 100644 --- a/syndications/indiekit.ts +++ b/syndications/indiekit.ts @@ -1,7 +1,8 @@ import fs from "fs"; import open from 'open'; import { fileTypeFromBuffer } from 'file-type'; -import type { Author, Post } from "~/types"; +import { getArchiveUrl } from "./archive_utils"; +import type { Author, Post, Reply } from "~/types"; function getHash(data: Bun.BlobOrStringOrBuffer) { const hasher = new Bun.CryptoHasher("md5"); @@ -94,18 +95,18 @@ async function authenticate() { } let retries = 0; -async function sendPost(body: Record) { +export async function sendMessage(body: string): Promise { if (accessToken == null) { accessToken = await authenticate(); } - await fetch("https://indie.incremental.social/micropub", { + return await fetch("https://indie.incremental.social/micropub", { method: "POST", headers: { 'Authorization': `Bearer ${accessToken}`, 'Content-Type': 'application/json' }, - body: JSON.stringify({ type: "h-entry", properties: body }) + body }).then(async res => { if (res.status === 429) { retries++; @@ -114,22 +115,63 @@ async function sendPost(body: Record) { process.exit(0); } await new Promise(resolve => setTimeout(resolve, 1000)); - return await sendPost(body); + return await sendMessage(body); } retries = 0; - if (res.status === 202) { - console.log(await res.json().then(r => r.success_description as string)); - } else { - console.warn("Failed to send post to indiekit", res, await res.text()); - throw res; + return res; + }); +} + +async function sendPost(body: Record) { + await sendMessage(JSON.stringify({ type: "h-entry", properties: body })) + .then(async res => { + if (res.status === 202) { + console.log(await res.json().then(r => r.success_description as string)); + } else { + console.warn("Failed to send message to indiekit", res, await res.text()); + throw res; + } + }); +} + +export async function getProperties(postUrl: string, ...properties: string[]): +Promise> { + if (accessToken == null) { + accessToken = await authenticate(); + } + + let reqUrl = "https://indie.incremental.social/micropub?q=source"; + reqUrl += `&url=${encode(postUrl)}`; + if (properties.length > 0) { + reqUrl += '&' + properties.map(p => `properties[]=${encode(p)}`).join('&'); + } + + return await fetch(reqUrl, { + method: "GET", + headers: { + 'Authorization': `Bearer ${accessToken}`, + 'Content-Type': 'application/json' } + }).then(async res => { + if (res.status === 429) { + retries++; + if (retries > 3) { + console.error("Too many retries! Giving up."); + process.exit(0); + } + await new Promise(resolve => setTimeout(resolve, 1000)); + return await getProperties(postUrl, ...properties); + } + + retries = 0; + return (await res.json()).properties; }); } const MEDIA_URLS_PATH = "./syndications/media_urls.json"; let mediaUrls: Record | undefined = undefined; -async function uploadMedia(url: string) { +export async function uploadMedia(url: string) { if (mediaUrls == null) { mediaUrls = fs.existsSync(MEDIA_URLS_PATH) ? JSON.parse(fs.readFileSync(MEDIA_URLS_PATH).toString()) as {} : {}; @@ -139,10 +181,18 @@ async function uploadMedia(url: string) { return mediaUrls[url]; } - let res = await fetch(url); + let res = await fetch(url, { redirect: "follow" }); if (res.status !== 200) { - console.log("Failed to download media", url, res, await res.text()); - return undefined; + console.log("Failed to download media:", res.status, res.statusText); + const archiveUrl = await getArchiveUrl(url); + if (archiveUrl) { + console.log("...but it appears archive.org may have a copy! Download from there..."); + res = await fetch(archiveUrl, { redirect: "follow" }); + if (res.status !== 200) { + console.log("Archive.org failed as well. Giving up."); + return undefined; + } + } } // Check if its already uploaded @@ -195,6 +245,7 @@ export async function addArticle(article: { category: string | string[]; photo?: string; originalUrl?: string; + replies?: Reply[]; }) { const { photo, ...body } = article; const preview = photo == null ? undefined : await uploadMedia(photo); @@ -215,7 +266,8 @@ export async function addBookmark(bookmark: { if (author) { author.image = author.image == null ? undefined : await uploadMedia(author.image); } - await sendPost({ ...body, author, preview }); + const archiveUrl = await getArchiveUrl(bookmark["bookmark-of"], bookmark.published?.getTime()); + await sendPost({ ...body, author, preview, archiveUrl }); } export async function addFavorite(favorite: { @@ -232,17 +284,20 @@ export async function addFavorite(favorite: { if (author) { author.image = author.image == null ? undefined : await uploadMedia(author.image); } - await sendPost({ ...body, author, preview }); + const archiveUrl = await getArchiveUrl(favorite["like-of"], favorite.published?.getTime()); + await sendPost({ ...body, author, preview, archiveUrl }); } export async function addReply(reply: { 'in-reply-to': string; + name?: string; content: string; published?: Date; category: string | string[]; photo?: string; originalUrl?: string; - parent: Partial + parent: Partial; + replies?: Reply[]; }) { const { photo, parent, ...body } = reply; const preview = photo == null ? undefined : await uploadMedia(photo); @@ -252,7 +307,8 @@ export async function addReply(reply: { if (parent.author?.image != null) { parent.author.image = await uploadMedia(parent.author.image); } - await sendPost({ ...body, parent, preview }); + const archiveUrl = await getArchiveUrl(reply["in-reply-to"], reply.published?.getTime()); + await sendPost({ ...body, parent, preview, archiveUrl }); } export async function addRepost(repost: { @@ -269,5 +325,6 @@ export async function addRepost(repost: { if (author) { author.image = author.image == null ? undefined : await uploadMedia(author.image); } - await sendPost({ ...body, author, preview }); + const archiveUrl = await getArchiveUrl(repost["repost-of"], repost.published?.getTime()); + await sendPost({ ...body, author, preview, archiveUrl }); } diff --git a/syndications/reddit_utils.ts b/syndications/reddit_utils.ts new file mode 100644 index 000000000..b0ae454ce --- /dev/null +++ b/syndications/reddit_utils.ts @@ -0,0 +1,169 @@ +import fs from "fs"; +import UserAgent from "user-agents"; +import snoowrap from "snoowrap"; + +const { clientId, clientSecret, username, password } = + JSON.parse(fs.readFileSync("./syndications/reddit_credentials.json").toString()); + +// The snoowrap typings are bad/inaccurate +interface Comment { + body_html: string; + permalink: string; + created: number; + subreddit?: string | { display_name: string }; + subreddit_name_prefixed: string; + author: string | { name: string }; + link_id: string; + replies?: { + created: number; + body_html: string; + author: string; + permalink: string; + }[]; + // and a bunch more we're not using +} + +interface Submission { + selftext_html: string; + permalink: string; + title: string; + created: number; + subreddit: string | { display_name: string }; + subreddit_name_prefixed: string; + author: string | { name: string }; + url: string; + preview?: { + images?: { + source?: { + url?: string; + } + }[] + }; + replies?: { + created: number; + body_html: string; + author: string; + permalink: string; + }[]; + // and a bunch more we're not using +} + +let r: snoowrap; +function setupReddit() { + try { + r = new snoowrap({ + userAgent: new UserAgent().toString(), + clientId, clientSecret, username, password + }); + return true; + } catch (error) { + console.error("Failed to setup reddit:", error); + process.exit(0); + } +} +setupReddit(); + +let ignoredSubreddits = new Set(); +const IGNORED_SUBREDDITS_PATH = "./syndications/ignored_subreddits.txt"; +if (fs.existsSync(IGNORED_SUBREDDITS_PATH)) { + ignoredSubreddits = new Set(fs.readFileSync(IGNORED_SUBREDDITS_PATH).toString().split("\n")); +} + +let subredditTags: Record = {}; +const SUBREDDIT_TAGS_PATH = "./syndications/subreddit_tags.json"; +if (fs.existsSync(SUBREDDIT_TAGS_PATH)) { + subredditTags = JSON.parse(fs.readFileSync(SUBREDDIT_TAGS_PATH).toString()); +} + +// https://stackoverflow.com/a/52171480/4376101 +const cyrb53 = (str: string, seed = 0) => { + let h1 = 0xdeadbeef ^ seed, h2 = 0x41c6ce57 ^ seed; + for(let i = 0, ch; i < str.length; i++) { + ch = str.charCodeAt(i); + h1 = Math.imul(h1 ^ ch, 2654435761); + h2 = Math.imul(h2 ^ ch, 1597334677); + } + h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507); + h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909); + h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507); + h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909); + + return 4294967296 * (2097151 & h2) + (h1 >>> 0); +}; + +export function getAvatar(user: string) { + const index = cyrb53(user) % 8; + return `https://www.redditstatic.com/avatars/defaults/v2/avatar_default_${index}.png`; +} + +export function extractUsername(user: Comment["author"]) { + return typeof user === "string" ? user : user.name; +} + +export function extractSubreddit(comment: Comment | Submission) { + if (typeof comment.subreddit === "string") { + return comment.subreddit; + } else if (comment.subreddit != null) { + return comment.subreddit.display_name; + } + return comment.subreddit_name_prefixed.slice(2); +} + +// I believe we've been permanently rate-limited, so we just read comments and posts from cache +export function getComment(id: string) { + let comment: Comment | undefined; + const commentCachePath = `./syndications/cache/${id}.json`; + if (fs.existsSync(commentCachePath)) { + comment = JSON.parse(fs.readFileSync(commentCachePath).toString()); + } + if (comment == undefined) { + return undefined; + } + if (extractUsername(comment.author) === "[deleted]") { + return undefined; + } + if (Object.keys(comment).length === 0) { + return undefined; + } + if (ignoredSubreddits.has(extractSubreddit(comment))) { + return undefined; + } + comment.subreddit = subredditTags[(extractSubreddit(comment)).toLowerCase()]; + comment.permalink = "https://www.reddit.com" + comment.permalink; + return comment; +} + +export function getSubmission(id: string) { + let submission: Submission | undefined; + const submissionCachePath = `./syndications/cache/${id}.json`; + if (fs.existsSync(submissionCachePath)) { + submission = JSON.parse(fs.readFileSync(submissionCachePath).toString()); + } + if (submission == undefined) { + return undefined; + } + if (extractUsername(submission.author) === "[deleted]") { + return undefined; + } + if (Object.keys(submission).length === 0) { + return undefined; + } + if (ignoredSubreddits.has(extractSubreddit(submission))) { + return undefined; + } + submission.subreddit = subredditTags[(extractSubreddit(submission)).toLowerCase()]; + submission.permalink = "https://www.reddit.com" + submission.permalink; + return submission; +} + +export function getPostPreview(submission: Submission) { + return submission.preview?.images?.[0]?.source?.url; +} + +export function getAuthorObj(author: string) { + return { + name: author, + url: `https://www.reddit.com/u/${author}`, + image: getAvatar(author) + }; +} diff --git a/syndications/subreddit_tags.json b/syndications/subreddit_tags.json new file mode 100644 index 000000000..5c7607089 --- /dev/null +++ b/syndications/subreddit_tags.json @@ -0,0 +1,752 @@ +{ + "antijokes": "comedy", + "announcements": "reddit", + "animalcrossingmeme": "nintendo", + "androiddev": "android", + "androidapps": "android", + "amadisasters": "ama", + "airz23": "tales", + "talesfromtechsupport": "tales", + "themoddingtree": "incremental", + "195": "comedy", + "bioshockinfinite": "gaming", + "bioshock": "gaming", + "cyanideandhappiness": "comics", + "comedyheaven": "comedy", + "comedycemetery": "comedy", + "collegehumor": "comedy", + "code": "programming", + "clickerheroes": "incremental", + "incremental_games": "incremental", + "incremental_gamedev": "incremental", + "tuxedo_jack": "politics", + "texasforsanders": "politics", + "walkablestreets": "urbanism", + "sandersforpresident": "politics", + "yangforpresidenthq": "politics", + "webdev": "programming", + "undertaleshitposts": "gaming", + "u_srgrafo": "comics", + "videos": "entertainment", + "voidlinux": "linux", + "vrui": "gaming", + "vrphysics": "gaming", + "vive": "gaming", + "voxelgamedev": "gamedev", + "technoblade": "minecraft", + "teslamotors": "entertainment", + "thanksobama": "comedy", + "thebutton": "reddit", + "place": "reddit", + "theoryofreddit": "reddit", + "traaaaaaannnnnnnnnns": "comedy", + "trendingsubreddits": "reddit", + "truereddit": "reddit", + "starterpacks": "comedy", + "subredditdrama": "reddit", + "subredditoftheday": "reddit", + "subredditsimulator": "reddit", + "talesfromtales": "tales", + "talesfromthepizzaguy": "tales", + "talesfromthelaw": "tales", + "talesfromthecustomer": "tales", + "talesfromretail": "tales", + "talesfromnoreply": "tales", + "talesfromlife": "tales", + "tearsofthekingdom": "zelda", + "shadowban": "reddit", + "shittyprogramming": "programming", + "snoovatars": "reddit", + "softwaregore": "comedy", + "soccer": "entertainment", + "propagandaposters": "politics", + "prorevenge": "tales", + "psispellcompendium": "minecraft", + "phoenixsc": "comedy", + "pixelart": "art", + "placeapi": "reddit", + "playmindcrack": "minecraft", + "playmygame": "gaming", + "pokemongo": "pokemon", + "pokemongodev": "pokemon", + "pokemongomystic": "pokemon", + "pokemonpicross": "pokemon", + "polandball": "comics", + "politicalhumor": "politics", + "political_revolution": "politics", + "politicalvideo": "politics", + "polymclauncher": "minecraft", + "prequelmemes": "movies", + "pricefield": "lifeisstrange", + "programme_irl": "programming", + "programmerreactions": "programming", + "programmertil": "programming", + "programmerhumor": "programming", + "programminghorror": "programming", + "programmingtools": "programming", + "pewdiepiesubmissions": "comedy", + "oculus": "gaming", + "oneplus": "android", + "ooer": "comedy", + "patches765": "tales", + "pcmasterrace": "gaming", + "pettyrevenge": "tales", + "museumofreddit": "reddit", + "nameaserver": "reddit", + "neutralpolitics": "politics", + "nexus": "android", + "nfl": "entertainment", + "nexus6": "android", + "nonononoyes": "comedy", + "notkenm": "comedy", + "notmyjob": "comedy", + "nottheonion": "comedy", + "nuclearrevenge": "tales", + "nursing": "education", + "mariomaker": "nintendo", + "marvel": "movies", + "marvelstudios": "movies", + "maliciouscompliance": "tales", + "materialdesign": "design", + "maybemaybemaybe": "comedy", + "maydaystrike": "politics", + "meirl": "comedy", + "memes": "comedy", + "memerestoration": "comedy", + "memeyourenthusiasm": "comedy", + "mildlyinfuriating": "tales", + "mindcrack": "minecraft", + "mindcrackcirclejerk": "minecraft", + "mindcrackdiscussion": "minecraft", + "minecraftdungeons": "minecraft", + "minecraft_earth": "minecraft", + "minecraftinventions": "minecraft", + "minecraftsuggestions": "minecraft", + "karmacourt": "reddit", + "kenm": "comedy", + "keming": "comedy", + "kidsarefuckingstupid": "comedy", + "lasercleaningporn": "entertainment", + "latestagecapitalism": "politics", + "learnpython": "programming", + "letsplaymygame": "gaming", + "libertarian": "politics", + "linuxcirclejerk": "linux", + "linux_gaming": "linux", + "linuxhardware": "linux", + "linuxmasterrace": "linux", + "linuxmemes": "linux", + "lolphp": "programming", + "ludology": "gaming", + "ludwigahgren": "comedy", + "hyruleengineering": "zelda", + "hyrulewarriors": "zelda", + "iama": "ama", + "iamverybadass": "comedy", + "iamverysmart": "comedy", + "ideasfortheadmins": "reddit", + "idontworkherelady": "tales", + "itsaunixsystem": "linux", + "jakeandamir": "comedy", + "java": "programming", + "javascript": "programming", + "jokes": "comedy", + "justgamedevthings": "gamedev", + "gifbattles": "entertainment", + "gifsound": "entertainment", + "gifs": "entertainment", + "gifsthatkeepongiving": "entertainment", + "goldbenefits": "reddit", + "googlehome": "technology", + "googlepixel": "android", + "grilledcheese": "entertainment", + "hillaryforprison": "politics", + "highqualitygifs": "entertainment", + "hitboxgore": "gaming", + "holdmybeer": "comedy", + "holdmycatnip": "comedy", + "holup": "entertainment", + "hqgstudios": "music", + "humblebundles": "gaming", + "feedthebeast": "minecraft", + "fellowkids": "comedy", + "fixedbytheduet": "entertainment", + "formula1": "entertainment", + "fortnite": "gaming", + "fortnitebr": "gaming", + "financialindependence": "politics", + "funnyandsad": "comedy", + "funnyvideos": "comedy", + "galaxynote5": "android", + "funny": "comedy", + "galaxys7": "android", + "gamedeals": "gaming", + "gamedesign": "gamedev", + "gamedevelopment": "gamedev", + "gamedevscreens": "gamedev", + "gamerporn": "gaming", + "gamers": "gaming", + "games": "gaming", + "gametales": "tales", + "gaming4gamers": "gaming", + "gamingcirclejerk": "gaming", + "dndgreentext": "ttrpg", + "dnd": "ttrpg", + "dndhomebrew": "ttrpg", + "dndmemes": "ttrpg", + "eatcheapandhealthy": "politics", + "entitledparents": "tales", + "entitledpeople": "tales", + "fakealbumcovers": "entertainment", + "facepalm": "entertainment", + "circlejerk": "reddit", + "cpp": "programming", + "crappydesign": "design", + "cringe": "entertainment", + "creepyasterisks": "entertainment", + "cringepics": "entertainment", + "animalsbeingjerks": "entertainment", + "writingprompts": "writing", + "tifu": "tales", + "thesilphroad": "pokemon", + "workreform": "politics", + "antiwork": "politics", + "amitheasshole": "tales", + "amifreetogo": "tales", + "amibeingdetained": "tales", + "adviceanimals": "comedy", + "bollywoodrealism": "comedy", + "bettereveryloop": "entertainment", + "beetlejuicing": "reddit", + "changelog": "reddit", + "cgpgrey2": "education", + "casualiama": "ama", + "bulletjournal": "art", + "bujo": "art", + "buildapc": "technology", + "buildapcsales": "technology", + "breath_of_the_wild": "zelda", + "diwhy": "comedy", + "dadjokes": "comedy", + "crackpack": "minecraft", + "gamerghazi": "politics", + "breadtube": "politics", + "murica": "comedy", + "murderedbywords": "comedy", + "modnews": "reddit", + "modcoord": "reddit", + "mirrorsforsale": "comedy", + "militarystories": "tales", + "military": "politics", + "mildlyinteresting": "entertainment", + "me_irl": "comedy", + "pics": "entertainment", + "photoshopbattles": "comedy", + "phijkchu": "entertainment", + "a:t5_3bl3m": "entertainment", + "a:t5_30a68": "minecraft", + "personalfinancecanada": "politics", + "personalfinance": "politics", + "perfecttiming": "entertainment", + "perfectlycutscreams": "entertainment", + "perfectloops": "entertainment", + "peoplefuckingdying": "comedy", + "peoplebeingjerks": "entertainment", + "penspinning": "entertainment", + "penmanshipporn": "art", + "patientgamers": "gaming", + "pathfinder_rpg": "ttrpg", + "pathfinder": "ttrpg", + "palestine": "politics", + "oopsdidntmeanto": "comedy", + "self": "reddit", + "askmen": "entertainment", + "askpsychology": "education", + "askhistorians": "education", + "askculinary": "entertainment", + "assholedesign": "design", + "animalcrossing": "nintendo", + "amongus": "gaming", + "unexpectedhogwarts": "entertainment", + "unity3d": "gamedev", + "godot": "gamedev", + "unixporn": "linux", + "yugioh": "television", + "youtubehaiku": "entertainment", + "youtube": "entertainment", + "yesyesyesyesno": "comedy", + "yandere_simulator_b": "gaming", + "yandere_simulator": "gaming", + "xubuntu": "linux", + "wtf": "entertainment", + "worldnews": "politics", + "worldbuilding": "comedy", + "wow": "entertainment", + "wowthanksimcured": "comedy", + "written4reddit": "writing", + "woahdude": "entertainment", + "13reasonswhy": "television", + "3dprinting": "art", + "3ds": "nintendo", + "actlikeyoubelong": "comedy", + "adventuretime": "television", + "advice": "entertainment", + "agario": "gaming", + "antimatterdimensions": "incremental", + "apple": "technology", + "askreddit": "entertainment", + "askredditafterdark": "entertainment", + "askscience": "science", + "asksciencefiction": "entertainment", + "aspergers": "education", + "atheism": "politics", + "atom": "programming", + "autochess": "gaming", + "aww": "entertainment", + "baww": "entertainment", + "baduibattles": "design", + "bananasforscale": "comedy", + "battlefield_one": "gaming", + "battlestations": "technology", + "beamazed": "entertainment", + "beatsaber": "gaming", + "beginnersguide": "gaming", + "bestof": "entertainment", + "bestoflegaladvice": "tales", + "beta": "reddit", + "bikinibottomtwitter": "comedy", + "bitcoin": "technology", + "blackmagicfuckery": "comedy", + "blackops4": "gaming", + "blackpeoplegifs": "entertainment", + "blackpeopletwitter": "entertainment", + "bleachshirts": "art", + "blink182": "music", + "blunderyears": "comedy", + "boardgames": "gaming", + "bojackhorseman": "television", + "bokunoheroacademia": "television", + "books": "entertainment", + "bossfight": "comedy", + "botsrights": "entertainment", + "botwatch": "entertainment", + "bravestwarriors": "television", + "bsa": "tales", + "buildingarcadia": "gaming", + "bullshitadvertising": "design", + "bullshit_translator": "writing", + "buyitforlife": "politics", + "captaindisillusion": "entertainment", + "cartoonphysics": "comedy", + "casualconversation": "entertainment", + "catastrophicfailure": "entertainment", + "cats": "entertainment", + "chairsunderwater": "comedy", + "changemyview": "politics", + "chatgpt": "technology", + "chemicalreactiongifs": "science", + "choosingbeggars": "entertainment", + "chromeos": "technology", + "circleoftrust": "reddit", + "college": "entertainment", + "conspiracy": "entertainment", + "convenientcop": "entertainment", + "cookieclicker": "incremental", + "coolgamesinc": "gamedev", + "coronavirus": "politics", + "cosplay": "art", + "creepy": "entertainment", + "crypto": "technology", + "cscareerquestions": "programming", + "curiousvideos": "entertainment", + "cynicalbrit": "gaming", + "damnthatsinteresting": "entertainment", + "dankmemes": "comedy", + "dart": "urbanism", + "datahoarder": "technology", + "dataisbeautiful": "science", + "datascience": "science", + "deadpool": "movies", + "destinythegame": "gaming", + "discordapp": "technology", + "disenchantment": "television", + "diy": "entertainment", + "dncleaks": "politics", + "dota2": "gaming", + "dougdoug": "entertainment", + "elderscrolls": "gaming", + "elderscrollsonline": "gaming", + "eminem": "music", + "engineering": "technology", + "engineeringporn": "technology", + "enzocomics": "comics", + "esist": "politics", + "eve": "gaming", + "expectationvsreality": "entertainment", + "explainbothsides": "politics", + "eyebleach": "entertainment", + "fallout": "gaming", + "fantheories": "entertainment", + "fireemblem": "gaming", + "firewatch": "gaming", + "forager": "gaming", + "freesoftware": "technology", + "frogs": "entertainment", + "fuckcars": "urbanism", + "fucklawns": "urbanism", + "fuckthealtright": "politics", + "funkopop": "gaming", + "gamebuildergarage": "gamedev", + "gemcraft": "gaming", + "ghibli": "movies", + "glitch_art": "art", + "godus": "gaming", + "gopro": "entertainment", + "grassrootsselect": "politics", + "gravityfalls": "television", + "gwent": "gaming", + "h3h3productions": "entertainment", + "hadesthegame": "gaming", + "haltandcatchfire": "television", + "harmonquest": "television", + "harrypotter": "movies", + "hasan_piker": "politics", + "headphones": "technology", + "hearthstone": "gaming", + "heathers": "movies", + "hellointernet": "education", + "historyporn": "education", + "houston": "places", + "idiotsincars": "entertainment", + "im14andthisisdeep": "politics", + "india": "places", + "indiegames": "gaming", + "indiegaming": "gaming", + "inscryption": "gaming", + "instant_regret": "entertainment", + "interestingasfuck": "entertainment", + "irc": "technology", + "irleastereggs": "entertainment", + "ironicsigns": "entertainment", + "joinrobin": "politics", + "justfuckmyshitup": "entertainment", + "kakosindustries": "entertainment", + "kurzgesagt": "education", + "lastweektonight": "politics", + "latex": "programming", + "leagueoflegends": "gaming", + "learndota2": "gaming", + "learnuselesstalents": "entertainment", + "london": "places", + "lootcratespoilers": "gaming", + "luna_lovewell": "writing", + "mademesmile": "entertainment", + "magictcg": "gaming", + "mandelaeffect": "entertainment", + "marbleitup": "gaming", + "marblemachinex": "music", + "mario": "nintendo", + "masterofnone": "television", + "masterreturns": "entertainment", + "mechanicalkeyboards": "technology", + "melvoridle": "incremental", + "minimalism": "design", + "minionhate": "comedy", + "mobpsycho100": "television", + "monitors": "technology", + "morbidquestions": "entertainment", + "mrrobot": "television", + "mychemicalromance": "music", + "mythbusters": "television", + "netsec": "technology", + "newjersey": "places", + "niceguys": "entertainment", + "nickofstatic": "entertainment", + "nightvale": "entertainment", + "nintendoswitch": "nintendo", + "nolawns": "urbanism", + "nomansskythegame": "gaming", + "nosleep": "writing", + "nostupidquestions": "entertainment", + "notjustbikes": "urbanism", + "nvidia": "technology", + "oddlysatisfying": "entertainment", + "oddlyterrifying": "entertainment", + "oldpeoplefacebook": "entertainment", + "oldschoolcool": "entertainment", + "onepunchman": "television", + "ontario": "places", + "osu": "gaming", + "outerwilds": "gaming", + "outoftheloop": "news", + "outside": "entertainment", + "overwatch": "gaming", + "pax": "gaming", + "perfecttower": "incremental", + "piano": "music", + "pidgin": "technology", + "piracy": "technology", + "plastidip": "entertainment", + "playtemtem": "gaming", + "plex": "technology", + "pointlesslygendered": "comedy", + "privacy": "politics", + "proceduralgeneration": "gamedev", + "ps4": "gaming", + "psych": "television", + "psychonaut": "gaming", + "publicfreakout": "entertainment", + "punkfashion": "art", + "rainmeter": "technology", + "rarepuppers": "entertainment", + "reactiongifs": "entertainment", + "reallifedoodles": "entertainment", + "reallifeshinies": "entertainment", + "redditalternatives": "reddit", + "reddit.com": "reddit", + "redditonwiki": "reddit", + "redditsync": "reddit", + "reddit_themes": "reddit", + "relationship_advice": "tales", + "relayforreddit": "reddit", + "rickandmorty": "television", + "roastmycar": "entertainment", + "rocketjump": "entertainment", + "rocketleague": "gaming", + "roguegenesia": "gaming", + "rpgtables": "ttrpg", + "ruinedmyday": "entertainment", + "sadcringe": "entertainment", + "sandiego": "places", + "scotland": "places", + "scrapbooking": "art", + "selfhosted": "technology", + "shitcosmosays": "comedy", + "shittyaskscience": "science", + "shittyconspiracy": "entertainment", + "shittykickstarters": "entertainment", + "shittylifeprotips": "entertainment", + "shittyreactiongifs": "entertainment", + "shittyrobots": "entertainment", + "showerthoughts": "entertainment", + "siliconvalleyhbo": "television", + "simulated": "entertainment", + "sixwordstories": "writing", + "skyrim": "gaming", + "sodadungeon": "incremental", + "soylent": "entertainment", + "speedrun": "gaming", + "stallmanwasright": "technology", + "starcitizen": "gaming", + "stardewvalley": "gaming", + "starwars": "movies", + "steam": "valve", + "steamdeck": "valve", + "stolenseats": "entertainment", + "strangerthings": "television", + "survivor": "television", + "swiggityswootygifs": "entertainment", + "swipeforfacebook": "technology", + "sydney": "places", + "synology": "technology", + "tabletopsimulator": "gaming", + "taptitans": "incremental", + "taptitans2": "incremental", + "tasker": "technology", + "teachers": "education", + "teamscorpion": "television", + "telegram": "technology", + "texas": "places", + "thailand": "places", + "amazon": "technology", + "labor": "politics", + "legaladvice": "tales", + "legaladviceuk": "comedy", + "leopardsatemyface": "entertainment", + "lgv20": "android", + "lifeprotips": "entertainment", + "lilwa_dexel": "writing", + "magic": "entertainment", + "mashgifs": "television", + "masterhacker": "comedy", + "medalmasters": "gaming", + "museumreviews": "reddit", + "namenerdcirclejerk": "entertainment", + "namenerds": "entertainment", + "narcissisticparents": "tales", + "nevertellmetheodds": "entertainment", + "nextfuckinglevel": "politics", + "nycbike": "entertainment", + "offmychest": "tales", + "onemillionwords": "writing", + "osana": "gaming", + "osha": "entertainment", + "placestart": "reddit", + "posterhunt": "television", + "psycho_alpaca": "writing", + "purplelounge": "reddit", + "quityourbullshit": "entertainment", + "raisedbynarcissists": "tales", + "randallcooper": "writing", + "randomkindness": "comedy", + "randomtables": "ttrpg", + "recruitinghell": "politics", + "redditserials": "writing", + "rpg": "ttrpg", + "rust": "programming", + "second": "reddit", + "secretsanta": "reddit", + "sequence": "reddit", + "silhouwhat": "design", + "skincareaddiction": "education", + "medicine": "education", + "space": "science", + "spacex": "science", + "squaredcircle": "entertainment", + "squidward_irl": "comedy", + "standupshots": "comedy", + "steamgrid": "valve", + "stoppedworking": "comedy", + "stupidpol": "politics", + "supplychain": "politics", + "survival": "tales", + "sysadmin": "technology", + "tabled": "ama", + "team60s": "reddit", + "breathinginformation": "comedy", + "companybattles": "comedy", + "croatianspy": "writing", + "cyberpunk": "art", + "deepintoyoutube": "reddit", + "digital_manipulation": "politics", + "drumpf": "politics", + "eugene": "places", + "exjw": "politics", + "explainlikeiama": "entertainment", + "explainlikeimfive": "entertainment", + "fuckhoa": "tales", + "futurology": "technology", + "gametheorists": "entertainment", + "geek": "entertainment", + "gnome": "linux", + "helpme": "politics", + "hiphopheads": "music", + "icandrawthat": "art", + "iiiiiiitttttttttttt": "technology", + "ilerminaty": "comedy", + "imaginarygaming": "art", + "imaginarygatekeeping": "comedy", + "infographics": "politics", + "internetisbeautiful": "entertainment", + "isitbullshit": "entertainment", + "istj": "entertainment", + "jobs": "tales", + "joeintransition": "politics", + "joerogan": "politics", + "justiceporn": "tales", + "justiceserved": "tales", + "blog": "reddit", + "blackout2015": "reddit", + "acqr": "nintendo", + "allaccessplaylists": "music", + "thatsinsane": "entertainment", + "the_gaben": "valve", + "thenetherlands": "places", + "therewasanattempt": "comedy", + "thisismylifenow": "comedy", + "thriftstorehauls": "entertainment", + "tiktokcringe": "entertainment", + "tiltshift": "art", + "todayilearned": "entertainment", + "undertale": "gaming", + "unethicallifeprotips": "comedy", + "unexpected": "entertainment", + "unrealengine": "gamedev", + "upliftingnews": "news", + "vampyr": "gaming", + "vegetarian": "politics", + "verticalwallpapers": "art", + "wallpaper": "art", + "wallpapers": "art", + "wallstreetbets": "entertainment", + "wearos": "android", + "web_design": "design", + "wellthatsucks": "comedy", + "wellthatwaspointless": "comedy", + "westworld": "television", + "whatcouldgoright": "comedy", + "whatcouldgowrong": "comedy", + "whatisthisthing": "entertainment", + "wheredidthesodago": "comedy", + "whereisassange": "politics", + "whitepeopletwitter": "entertainment", + "wholesome": "entertainment", + "wholesomecompliance": "tales", + "wholesomegreentext": "tales", + "wholesomememes": "comedy", + "wiiu": "nintendo", + "wikileaks": "politics", + "wikipedia": "education", + "winnipeg": "places", + "witcher": "gaming", + "youdontsurf": "comedy", + "yourjokebutworse": "comedy", + "xboxone": "gaming", + "youshouldknow": "entertainment", + "alberta": "places", + "ashens": "entertainment", + "atlanta": "places", + "austin": "places", + "australia": "places", + "badroommates": "tales", + "blackmirror": "television", + "boston": "places", + "canada": "places", + "cgpgrey": "education", + "chicago": "places", + "china": "places", + "dallas": "places", + "denton": "places", + "doctorwho": "television", + "europe": "places", + "justneckbeardthings": "tales", + "philosophy": "gaming", + "psychology": "education", + "thehunter": "gaming", + "tidtrt": "tales", + "timberwolves": "entertainment", + "tmobile": "technology", + "tooafraidtoask": "entertainment", + "topmindsofreddit": "reddit", + "translator": "politics", + "trashy": "entertainment", + "traumatizethemback": "tales", + "trees": "comedy", + "trexgonewild": "comedy", + "trollxchromosomes": "entertainment", + "trollxfunny": "entertainment", + "trumpgret": "politics", + "tumblr": "entertainment", + "twoxchromosomes": "tales", + "u_bluecoatengineer": "tales", + "u_elpinko": "music", + "u_kermit_defrogg": "tales", + "u__mikebishop": "urbanism", + "underreportednews": "news", + "upvoted": "reddit", + "urealms": "entertainment", + "usefulredcircle": "comedy", + "videoessay": "television", + "math": "science", + "visualizedmath": "science", + "vzla": "places", + "weddingshaming": "tales", + "wholesomebpt": "entertainment", + "wigglegrams": "art", + "wtfgaragesale": "entertainment", + "parenting": "education", + "anime": "television", + "food": "entertainment", + "google": "technology", + "sports": "entertainment" +} diff --git a/syndications/update_reddit.ts b/syndications/update_reddit.ts new file mode 100644 index 000000000..c642943dc --- /dev/null +++ b/syndications/update_reddit.ts @@ -0,0 +1,235 @@ +import fs from "fs"; +import { parse } from "csv-parse"; +import { addArticle, addBookmark, addFavorite, addReply, addRepost } from "./indiekit"; +import { + extractSubreddit, + extractUsername, + getAuthorObj, + getComment, + getPostPreview, + getSubmission +} from "./reddit_utils"; + +// I don't use reddit anymore, so I just use the takeout data I exported +async function run() { + await updateCommentVotes(); + await updateComments(); + await updateSubmissionVotes(); + await updateSubmissions(); + await updateSavedComments(); + await updateSavedSubmissions(); +} +run(); + +async function updateCommentVotes() { + const comment_votes = fs + .createReadStream("./syndications/reddit-export/comment_votes.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink, direction } of comment_votes) { + if (direction !== "up") { + continue; + } + + const comment = getComment(id); + if (comment == null) { + continue; + } + + const author = extractUsername(comment.author); + const subreddit = extractSubreddit(comment); + console.log(JSON.stringify({ + + "repost-of": permalink, + content: comment.body_html, + published: new Date(comment.created * 1000), + category: [subreddit], + author: getAuthorObj(author) + })) + + await addRepost({ + "repost-of": permalink, + content: comment.body_html, + published: new Date(comment.created * 1000), + category: [subreddit], + author: getAuthorObj(author) + }); + return; + } +} + +async function updateComments() { + const comments = fs + .createReadStream("./reddit-export/comments.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink, parent } of comments) { + const comment = getComment(id); + if (comment == null) { + continue; + } + + const subreddit = extractSubreddit(comment); + const parentComment = parent == null ? undefined : getComment(parent); + const submission = parent == null ? + getSubmission(comment.link_id.replace(/t._/, '')) : undefined; + const parentObj = (parent == null ? submission : parentComment)!; + const author = extractUsername(parentObj.author); + + await addReply({ + "in-reply-to": parent == null ? + (submission!.selftext_html ? submission!.permalink : submission!.url) : + parentComment!.permalink, + category: [subreddit], + content: comment.body_html, + parent: { + kind: parent == null ? "article" : "reply", + title: parent == null ? submission!.title : undefined, + description: parent == null ? + (submission!.selftext_html ?? undefined) : + parentComment!.body_html, + url: parent == null ? + (submission!.selftext_html ? undefined : submission!.url) : undefined, + published: new Date(parentObj.created * 1000).getTime(), + author: getAuthorObj(author), + image: parent == null ? getPostPreview(submission!) : undefined, + tags: [subreddit], + syndications: [parentObj.permalink] + }, + originalUrl: permalink, + published: new Date(comment.created * 1000), + replies: submission?.replies?.map(reply => ({ + author: getAuthorObj(reply.author), + published: new Date(reply.created * 1000).getTime(), + body: reply.body_html, + syndication: reply.permalink + })) + }); + return; + } +} + +async function updateSubmissionVotes() { + const comments = fs + .createReadStream("./reddit-export/post_votes.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink, direction } of comments) { + if (direction !== "up") { + continue; + } + + const submission = getSubmission(id); + if (submission == null) { + continue; + } + + const subreddit = extractSubreddit(submission); + + if (submission.selftext_html) { + await addRepost({ + "repost-of": permalink, + category: [subreddit], + content: submission.selftext_html, + name: submission.title, + published: new Date(submission.created * 1000), + author: getAuthorObj(extractUsername(submission.author)) + }); + } else { + await addBookmark({ + "bookmark-of": submission.url, + category: [subreddit], + published: new Date(submission.created * 1000) + }); + return; + } + } +} + +async function updateSubmissions() { + const posts = fs + .createReadStream("./syndications/reddit-export/posts.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink, date, subreddit, title, url, body } of posts) { + if (title === "[deleted by user]") { + continue; + } + + if (body) { + const submission = getSubmission(id); + if (submission == null) { + continue; + } + + await addArticle({ + name: title, + category: [subreddit], + content: body, + originalUrl: permalink, + published: new Date(date), + replies: submission?.replies?.map(reply => ({ + author: getAuthorObj(reply.author), + published: new Date(reply.created * 1000).getTime(), + body: reply.body_html, + syndication: reply.permalink + })) + }); + } else { + // TODO some of these should probably be replaced with article posts from the linked url + await addBookmark({ + "bookmark-of": url, + category: [subreddit], + published: new Date(date) + }); + return; + } + } +} + +async function updateSavedComments() { + const posts = fs + .createReadStream("./syndications/reddit-export/saved_comments.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink } of posts) { + const comment = getComment(id); + if (comment == null) { + continue; + } + + await addFavorite({ + "like-of": permalink, + category: [extractSubreddit(comment)], + content: comment.body_html, + published: new Date(comment.created * 1000) + }); + return; + } +} + +async function updateSavedSubmissions() { + const posts = fs + .createReadStream("./syndications/reddit-export/saved_posts.csv") + .pipe(parse({ columns: true })); + for await (const { id, permalink } of posts) { + const submission = getSubmission(id); + if (submission == null) { + continue; + } + + const subreddit = extractSubreddit(submission); + + if (submission.selftext_html) { + await addFavorite({ + "like-of": permalink, + name: submission.title, + category: [subreddit], + content: submission.selftext_html, + published: new Date(submission.created * 1000) + }); + } else { + await addFavorite({ + "like-of": permalink, + category: [subreddit], + published: new Date(submission.created * 1000) + }); + return; + } + } +}