From fbcaab53d03a1d0111367f7cea539f27eddaf89d Mon Sep 17 00:00:00 2001 From: Bruno Bernardino Date: Mon, 14 Oct 2024 15:13:55 +0100 Subject: [PATCH] Add Cron to delete old, read articles. Update Deno. --- .dvmrc | 2 +- Dockerfile | 2 +- crons/index.ts | 10 +++++--- crons/news.ts | 41 +++++++++++++++++++++++++++++++ crons/{cleanup.ts => sessions.ts} | 0 5 files changed, 50 insertions(+), 5 deletions(-) rename crons/{cleanup.ts => sessions.ts} (100%) diff --git a/.dvmrc b/.dvmrc index 94ed04f..a82727c 100644 --- a/.dvmrc +++ b/.dvmrc @@ -1 +1 @@ -1.46.2 +1.46.3 diff --git a/Dockerfile b/Dockerfile index e46b810..a00f264 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM denoland/deno:ubuntu-1.46.2 +FROM denoland/deno:ubuntu-1.46.3 EXPOSE 8000 diff --git a/crons/index.ts b/crons/index.ts index f380a6c..9368994 100644 --- a/crons/index.ts +++ b/crons/index.ts @@ -1,8 +1,8 @@ -import { Cron } from 'https://deno.land/x/croner@8.0.1/dist/croner.js'; +import { Cron } from 'https://deno.land/x/croner@8.1.2/dist/croner.js'; import { isAppEnabled } from '/lib/config.ts'; -import { cleanupSessions } from './cleanup.ts'; -import { fetchNewArticles } from './news.ts'; +import { cleanupSessions } from './sessions.ts'; +import { cleanupOldArticles, fetchNewArticles } from './news.ts'; export function startCrons() { new Cron( @@ -14,6 +14,10 @@ export function startCrons() { }, async () => { await cleanupSessions(); + + if (isAppEnabled('news')) { + await cleanupOldArticles(); + } }, ); diff --git a/crons/news.ts b/crons/news.ts index 84f2fcc..3d2cfe0 100644 --- a/crons/news.ts +++ b/crons/news.ts @@ -25,3 +25,44 @@ export async function fetchNewArticles(forceFetch = false) { console.error(error); } } + +export async function cleanupOldArticles() { + const oneMonthAgo = new Date(new Date().setUTCMonth(new Date().getUTCMonth() - 1)); + + try { + console.info('Will cleanup old articles'); + + const feedIdsToSkip = new Set(); + + const feeds = await db.query>( + sql`SELECT "id", "feed_url" FROM "bewcloud_news_feeds" ORDER BY "last_crawled_at" ASC`, + ); + + for (const feed of feeds) { + const recentArticlesCount = (await db.query<{ count: number }>( + sql`SELECT COUNT("id") AS "count" FROM "bewcloud_news_feed_articles" WHERE "feed_id" = $1 AND "article_date" >= $2`, + [feed.id, oneMonthAgo], + ))[0].count; + + // Don't delete old articles if the feed doesn't have recent articles (to skip feeds with less items in total) + if (recentArticlesCount <= 5) { + feedIdsToSkip.add(feed.id); + } + } + + const result = await db.query<{ count: number }>( + sql`WITH "deleted" AS ( + DELETE FROM "bewcloud_news_feed_articles" WHERE "is_read" = TRUE AND "article_date" <= $1 AND "feed_id" != ANY($2) RETURNING * + ) + SELECT COUNT(*) FROM "deleted"`, + [ + oneMonthAgo.toISOString().substring(0, 10), + [...feedIdsToSkip], + ], + ); + + console.info('Deleted', result[0].count, 'old articles'); + } catch (error) { + console.error(error); + } +} diff --git a/crons/cleanup.ts b/crons/sessions.ts similarity index 100% rename from crons/cleanup.ts rename to crons/sessions.ts