const fs = require("fs");
const path = require("path");
const wordCounting = require("word-counting");
const util = require('node:util');
const exec = util.promisify(require('node:child_process').exec);
const { Feed } = require('feed');
function walk(dir, cb) {
const list = fs.readdirSync(dir);
return Promise.all(list.map(file => {
const resolvedFile = path.resolve(dir, file);
const stat = fs.statSync(resolvedFile);
if (stat.isDirectory()) {
return walk(resolvedFile, cb);
} else {
return new Promise((resolve) => cb(dir, resolvedFile, resolve));
}
}));
}
function toSlug(string) {
return string.toLowerCase().replaceAll(' ', '-');
}
function moveImportStatementUp(filePath, times = 1) {
let data = fs.readFileSync(filePath).toString();
const fd = fs.openSync(filePath, "w+");
for (let i = 0; i < times; i++) {
data = data.replace(/'\.\.\//g, '\'');
}
fs.writeSync(fd, data);
fs.closeSync(fd);
}
(async () => {
const blockRefs = {};
const blockLinks = {};
const indices = [];
await walk("./garden-output/logseq-pages", (dir, file, resolve) => {
const filePath = path.resolve(dir, file);
const data = fs.readFileSync(filePath).toString();
const slug = path.basename(file, ".md").replaceAll('___', '/').replaceAll(/%3F/gi, '').replace('what-is-content-', 'what-is-content');
for (const match of data.matchAll(/(.*)\n\s*id:: (.*)/gm)) {
const text = match[1];
const id = match[2];
const link = `/garden/${slug}/index.md#${id}`;
blockLinks[id] = link;
blockRefs[id] = `[${text}](${link})`;
}
if (data.match(/index: "true"/g)) {
indices.push(slug);
}
resolve();
});
const pageLinks = {};
const taggedBy = {};
const tagged = {};
const referencedBy = {};
// Walk through the pages to make sure we get the canonical name page (pre-slug)
// The logseq-export README made it sound like even the title property is transformed sometimes
await walk("./Garden/pages", (dir, file, resolve) => {
const filePath = path.resolve(dir, file);
let data = fs.readFileSync(filePath).toString();
if (!data.match(/public::/g)) {
resolve();
return;
}
const startPrivate = data.indexOf("- private");
if (startPrivate > 0) {
data = data.slice(0, startPrivate);
}
const name = path.basename(file, ".md").replaceAll('___', '/');
const slug = toSlug(name).replaceAll(/%3F/gi, '').replaceAll('\'', '-');
const link = `/garden/${slug}/index.md`;
pageLinks[name.replaceAll(/%3F/gi, '?')] = link;
for (const match of data.matchAll(/alias:: (.*)/g)) {
match[1].split(", ").forEach(page => (pageLinks[page] = link));
}
for (const match of data.matchAll(/tags:: (.*)/g)) {
match[1].split(", ").forEach(page => {
const pageSlug = toSlug(page);
taggedBy[pageSlug] = [...(taggedBy[pageSlug] ?? []), name];
tagged[slug] = [...(tagged[slug] ?? []), page];
});
}
if (!indices.includes(slug)) {
for (const match of data.matchAll(/\[\[([^\[\]]*)\]\]/g)) {
const pageSlug = toSlug(match[1]);
referencedBy[pageSlug] = [...(referencedBy[pageSlug] ?? []), name];
}
}
resolve();
});
Object.keys(referencedBy).forEach(page => {
referencedBy[page] = Array.from(new Set(referencedBy[page]));
});
pageLinks["NOW"] = "/now/index";
await walk("./garden-output/logseq-pages", async (dir, file, resolve) => {
const filePath = path.resolve(dir, file);
let data = fs.readFileSync(filePath).toString();
// Count word counts with a special set of transformations that should make it more accurate
const strippedData = data.replace(/---\n[\S\s]*\n---/gm, '').replaceAll(/.*::.*/g, '').replaceAll(/\[([^\]]*)\]\(.*\)/g, '$1');
const wc = wordCounting(strippedData).wordsCount;
// Replace youtube embeds
data = data.replaceAll(
/{{video https:\/\/(?:www\.)?youtube\.com\/watch\?v=(.*)}}/g,
'');
// Replace internal links
data = data.replaceAll(
/]\(\/logseq-pages\/([^\)]*)\)/g,
'](/garden/$1/index.md)');
// Replace block links
data = data.replaceAll(
/\(\((.*)\)\)/g,
(_, id) => blockRefs[id]);
// Remove id:: lines
data = data.replaceAll(
/(#+) (.*)\n\s*id:: (.*)/gm,
(_, h, title, id) => `${title}`);
data = data.replaceAll(
/(.*)\n\s*id:: (.*)/gm,
'$1');
// Fix internal links with spaces not getting mapped
data = data.replaceAll(
/\[\[([^\[\]]*)\]\]/g,
(_, page) => `[${page}](${pageLinks[page]})`);
// Fix internal asset links
data = data.replaceAll(
/\(\/logseq-assets\/([^\)]*)\)/g,
'(/garden/$1)');
// Fix logseq block links
data = data.replaceAll(
/logseq:\/\/graph\/Garden\?block-id=([^\)]*)/g,
(_, block) => `${blockLinks[block]})`);
// Fix logseq page links
data = data.replaceAll(
/logseq:\/\/graph\/Garden\?page=([^\)]*)/g,
(_, page) => `${pageLinks[page.replaceAll('%20', ' ')]})`);
// Wrap images
data = data.replaceAll(
/!\[([^\]]*)\]\(([^\)]*)\)/g,
(_, title, src) => `
`)
// Add tags and references
const title = path.basename(file, ".md");
if (title in tagged) {
data = data.replaceAll(
/---\n\n/gm,
`---\n\n> Tags: ${tagged[title].map(tag => `[${tag}](${pageLinks[tag]})`).join(", ")}\n\n`);
}
if (title in taggedBy) {
data = data.replaceAll(
/---\n\n/gm,
`---\n\n> Tagged by: ${taggedBy[title].map(tag => `[${tag}](${pageLinks[tag]})`).join(", ")}\n\n`);
}
// TODO show context on references? Perhaps in a `::: info` block?
if (title in referencedBy) {
data = data.replaceAll(
/---\n\n/gm,
`---\n\n> Referenced by: ${referencedBy[title].map(tag => `[${tag}](${pageLinks[tag]})`).join(", ")}\n\n`);
}
// Fix links to /now
data = data.replace('NOW', '/now')
// Add header to the top
data = data.replaceAll('___', '/');
const relPath = path.relative("./garden-output/logseq-pages", path.resolve(...filePath.split("___"))).replaceAll(/%3F/gi, '').replace('what-is-content-', 'what-is-content').replace('.md', '/index.md');
data = data.replaceAll(
/---\n\n/gm,
`prev: false
next: false
---