Files
bewcloud/crons/news.ts
Bruno Bernardino 6cfb62d1a2 Refactor data handlers + misc fixes
This refactors the data handlers into a more standard/understood model-like architecture, to prepare for a new, more robust config system.

It also fixes a problem with creating new Notes and uploading new Photos via the web interface (related to #58).

Finally, it speeds up docker builds by sending in less files, which aren't necessary or will be built anyway.

This is all in preparation to allow building #13 more robustly.
2025-05-24 08:24:10 +01:00

69 lines
2.3 KiB
TypeScript

import Database, { sql } from '/lib/interfaces/database.ts';
import { NewsFeed } from '/lib/types.ts';
import { concurrentPromises } from '/lib/utils/misc.ts';
import { FeedModel } from '/lib/models/news.ts';
const db = new Database();
export async function fetchNewArticles(forceFetch = false) {
const fourHoursAgo = forceFetch ? new Date() : new Date(new Date().setUTCHours(new Date().getUTCHours() - 4));
try {
const feedsToCrawl = await db.query<NewsFeed>(
sql`SELECT * FROM "bewcloud_news_feeds" WHERE "last_crawled_at" IS NULL OR "last_crawled_at" <= $1 ORDER BY "last_crawled_at" ASC`,
[
fourHoursAgo.toISOString(),
],
);
console.info('Will crawl', feedsToCrawl.length, 'news feeds');
await concurrentPromises(feedsToCrawl.map((newsFeed) => () => FeedModel.crawl(newsFeed)), 3);
console.info('Crawled', feedsToCrawl.length, 'news feeds');
} catch (error) {
console.error(error);
}
}
export async function cleanupOldArticles() {
const oneMonthAgo = new Date(new Date().setUTCMonth(new Date().getUTCMonth() - 1));
try {
console.info('Will cleanup old articles');
const feedIdsToSkip = new Set<string>();
const feeds = await db.query<Pick<NewsFeed, 'id' | 'feed_url'>>(
sql`SELECT "id", "feed_url" FROM "bewcloud_news_feeds" ORDER BY "last_crawled_at" ASC`,
);
for (const feed of feeds) {
const recentArticlesCount = (await db.query<{ count: number }>(
sql`SELECT COUNT("id") AS "count" FROM "bewcloud_news_feed_articles" WHERE "feed_id" = $1 AND "article_date" >= $2`,
[feed.id, oneMonthAgo.toISOString().substring(0, 10)],
))[0].count;
// Don't delete old articles if the feed doesn't have many recent articles (low frequency feeds)
if (recentArticlesCount <= 5) {
feedIdsToSkip.add(feed.id);
}
}
const result = await db.query<{ count: number }>(
sql`WITH "deleted" AS (
DELETE FROM "bewcloud_news_feed_articles" WHERE "is_read" = TRUE AND "article_date" <= $1 AND NOT ("feed_id" != ANY($2)) RETURNING *
)
SELECT COUNT(*) FROM "deleted"`,
[
oneMonthAgo.toISOString().substring(0, 10),
[...feedIdsToSkip],
],
);
console.info('Deleted', result[0].count, 'old articles');
} catch (error) {
console.error(error);
}
}