Checkpoint: Moteur RSS : server/rssEngine.ts (parseur RSS/Atom, règles d'automatisme, déduplication), server/scheduledRoutes.ts (endpoint POST /api/scheduled/rss-fetch), montage dans _core/index.ts

This commit is contained in:
Manus
2026-04-28 12:35:58 -04:00
parent 8f2a22e4b1
commit ddf1533d04
7 changed files with 389 additions and 2 deletions

View File

@@ -1,4 +1,4 @@
{ {
"version": "a13a3f60", "version": "f278ae22",
"timestamp": 1777365200860 "timestamp": 1777394158072
} }

View File

@@ -61,6 +61,7 @@
"drizzle-orm": "^0.44.5", "drizzle-orm": "^0.44.5",
"embla-carousel-react": "^8.6.0", "embla-carousel-react": "^8.6.0",
"express": "^4.21.2", "express": "^4.21.2",
"fast-xml-parser": "^5.7.2",
"framer-motion": "^12.23.22", "framer-motion": "^12.23.22",
"input-otp": "^1.4.2", "input-otp": "^1.4.2",
"jose": "6.1.0", "jose": "6.1.0",

37
pnpm-lock.yaml generated
View File

@@ -160,6 +160,9 @@ importers:
express: express:
specifier: ^4.21.2 specifier: ^4.21.2
version: 4.21.2 version: 4.21.2
fast-xml-parser:
specifier: ^5.7.2
version: 5.7.2
framer-motion: framer-motion:
specifier: ^12.23.22 specifier: ^12.23.22
version: 12.23.22(react-dom@19.2.1(react@19.2.1))(react@19.2.1) version: 12.23.22(react-dom@19.2.1(react@19.2.1))(react@19.2.1)
@@ -1079,6 +1082,9 @@ packages:
'@mermaid-js/parser@0.6.3': '@mermaid-js/parser@0.6.3':
resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==} resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==}
'@nodable/entities@2.1.0':
resolution: {integrity: sha512-nyT7T3nbMyBI/lvr6L5TyWbFJAI9FTgVRakNoBqCD+PmID8DzFrrNdLLtHMwMszOtqZa8PAOV24ZqDnQrhQINA==}
'@radix-ui/number@1.1.1': '@radix-ui/number@1.1.1':
resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==} resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==}
@@ -3081,10 +3087,17 @@ packages:
resolution: {integrity: sha512-6rxyATwPCkaFIL3JLqw8qXqMpIZ942pTX/tbQFkRsDGblS8tNGtlUauA/+mt6RUfqn/4MoEr+WDkYoIQbibWuQ==} resolution: {integrity: sha512-6rxyATwPCkaFIL3JLqw8qXqMpIZ942pTX/tbQFkRsDGblS8tNGtlUauA/+mt6RUfqn/4MoEr+WDkYoIQbibWuQ==}
engines: {node: '>=6.0.0'} engines: {node: '>=6.0.0'}
fast-xml-builder@1.1.5:
resolution: {integrity: sha512-4TJn/8FKLeslLAH3dnohXqE3QSoxkhvaMzepOIZytwJXZO69Bfz0HBdDHzOTOon6G59Zrk6VQ2bEiv1t61rfkA==}
fast-xml-parser@5.2.5: fast-xml-parser@5.2.5:
resolution: {integrity: sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ==} resolution: {integrity: sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ==}
hasBin: true hasBin: true
fast-xml-parser@5.7.2:
resolution: {integrity: sha512-P7oW7tLbYnhOLQk/Gv7cZgzgMPP/XN03K02/Jy6Y/NHzyIAIpxuZIM/YqAkfiXFPxA2CTm7NtCijK9EDu09u2w==}
hasBin: true
fdir@6.5.0: fdir@6.5.0:
resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
engines: {node: '>=12.0.0'} engines: {node: '>=12.0.0'}
@@ -3734,6 +3747,10 @@ packages:
path-data-parser@0.1.0: path-data-parser@0.1.0:
resolution: {integrity: sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==} resolution: {integrity: sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==}
path-expression-matcher@1.5.0:
resolution: {integrity: sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ==}
engines: {node: '>=14.0.0'}
path-to-regexp@0.1.12: path-to-regexp@0.1.12:
resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==} resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==}
@@ -4076,6 +4093,9 @@ packages:
strnum@2.1.1: strnum@2.1.1:
resolution: {integrity: sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw==} resolution: {integrity: sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw==}
strnum@2.2.3:
resolution: {integrity: sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==}
style-to-js@1.1.18: style-to-js@1.1.18:
resolution: {integrity: sha512-JFPn62D4kJaPTnhFUI244MThx+FEGbi+9dw1b9yBBQ+1CZpV7QAT8kUtJ7b7EUNdHajjF/0x8fT+16oLJoojLg==} resolution: {integrity: sha512-JFPn62D4kJaPTnhFUI244MThx+FEGbi+9dw1b9yBBQ+1CZpV7QAT8kUtJ7b7EUNdHajjF/0x8fT+16oLJoojLg==}
@@ -5366,6 +5386,8 @@ snapshots:
dependencies: dependencies:
langium: 3.3.1 langium: 3.3.1
'@nodable/entities@2.1.0': {}
'@radix-ui/number@1.1.1': {} '@radix-ui/number@1.1.1': {}
'@radix-ui/primitive@1.1.3': {} '@radix-ui/primitive@1.1.3': {}
@@ -7518,10 +7540,21 @@ snapshots:
fast-equals@5.3.2: {} fast-equals@5.3.2: {}
fast-xml-builder@1.1.5:
dependencies:
path-expression-matcher: 1.5.0
fast-xml-parser@5.2.5: fast-xml-parser@5.2.5:
dependencies: dependencies:
strnum: 2.1.1 strnum: 2.1.1
fast-xml-parser@5.7.2:
dependencies:
'@nodable/entities': 2.1.0
fast-xml-builder: 1.1.5
path-expression-matcher: 1.5.0
strnum: 2.2.3
fdir@6.5.0(picomatch@4.0.3): fdir@6.5.0(picomatch@4.0.3):
optionalDependencies: optionalDependencies:
picomatch: 4.0.3 picomatch: 4.0.3
@@ -8423,6 +8456,8 @@ snapshots:
path-data-parser@0.1.0: {} path-data-parser@0.1.0: {}
path-expression-matcher@1.5.0: {}
path-to-regexp@0.1.12: {} path-to-regexp@0.1.12: {}
pathe@1.1.2: {} pathe@1.1.2: {}
@@ -8872,6 +8907,8 @@ snapshots:
strnum@2.1.1: {} strnum@2.1.1: {}
strnum@2.2.3: {}
style-to-js@1.1.18: style-to-js@1.1.18:
dependencies: dependencies:
style-to-object: 1.0.11 style-to-object: 1.0.11

View File

@@ -10,6 +10,7 @@ import { createContext } from "./context";
import { serveStatic, setupVite } from "./vite"; import { serveStatic, setupVite } from "./vite";
import { runFullImport } from "../importer"; import { runFullImport } from "../importer";
import uploadRoutes from "../uploadRoutes"; import uploadRoutes from "../uploadRoutes";
import scheduledRoutes from "../scheduledRoutes";
import { ensureAdminExists } from "../localAuth"; import { ensureAdminExists } from "../localAuth";
import { getSetting } from "../db"; import { getSetting } from "../db";
@@ -65,6 +66,7 @@ async function startServer() {
registerOAuthRoutes(app); registerOAuthRoutes(app);
app.use(uploadRoutes); app.use(uploadRoutes);
app.use(scheduledRoutes);
app.use( app.use(
"/api/trpc", "/api/trpc",

276
server/rssEngine.ts Normal file
View File

@@ -0,0 +1,276 @@
/**
* Moteur de lecture RSS
* Récupère les flux actifs, parse les articles, applique les règles d'automatisme,
* et insère les nouveaux articles dans veille_items ou aap_items.
*/
import { XMLParser } from "fast-xml-parser";
import * as crypto from "crypto";
import { getDb } from "./db";
import {
rssFeeds,
veilleItems,
aapItems,
type RssFeed,
} from "../drizzle/schema";
import { eq } from "drizzle-orm";
// ─── Types internes ───────────────────────────────────────────────────────────
interface RssItem {
title: string;
description?: string;
link?: string;
pubDate?: string;
guid?: string;
}
interface AutoRule {
keyword: string;
typeVeille?: "reglementaire" | "concurrentielle" | "technologique" | "generale";
categorieAap?: "Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre";
}
interface FetchResult {
feedId: number;
feedName: string;
status: "ok" | "error";
newItems: number;
skippedItems: number;
error?: string;
}
// ─── Utilitaires ─────────────────────────────────────────────────────────────
function dedupHash(text: string): string {
return crypto.createHash("sha256").update(text).digest("hex").substring(0, 64);
}
function parseDate(dateStr?: string): Date | null {
if (!dateStr) return null;
const d = new Date(dateStr);
return isNaN(d.getTime()) ? null : d;
}
function stripHtml(html: string): string {
return html.replace(/<[^>]*>/g, "").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
}
/**
* Applique les règles d'automatisme sur le titre + description d'un article.
* Retourne le premier match trouvé, ou null si aucune règle ne correspond.
*/
function applyAutoRules(
title: string,
description: string,
rules: AutoRule[]
): AutoRule | null {
const text = (title + " " + description).toLowerCase();
for (const rule of rules) {
if (text.includes(rule.keyword.toLowerCase())) {
return rule;
}
}
return null;
}
// ─── Parsing RSS/Atom ─────────────────────────────────────────────────────────
async function fetchAndParseRss(url: string): Promise<RssItem[]> {
const response = await fetch(url, {
headers: {
"User-Agent": "Mozilla/5.0 (compatible; VeilleBot/1.0; +https://itinova.fr)",
"Accept": "application/rss+xml, application/xml, text/xml, */*",
},
signal: AbortSignal.timeout(15000),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`);
}
const xml = await response.text();
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: "@_",
textNodeName: "#text",
parseAttributeValue: true,
trimValues: true,
});
const parsed = parser.parse(xml);
// Support RSS 2.0
const channel = parsed?.rss?.channel;
if (channel) {
const items = Array.isArray(channel.item) ? channel.item : channel.item ? [channel.item] : [];
return items.map((item: any) => ({
title: String(item.title?.["#text"] ?? item.title ?? ""),
description: String(item.description?.["#text"] ?? item.description ?? ""),
link: String(item.link?.["#text"] ?? item.link ?? item.guid?.["#text"] ?? item.guid ?? ""),
pubDate: String(item.pubDate ?? item["dc:date"] ?? ""),
guid: String(item.guid?.["#text"] ?? item.guid ?? item.link ?? ""),
}));
}
// Support Atom
const feed = parsed?.feed;
if (feed) {
const entries = Array.isArray(feed.entry) ? feed.entry : feed.entry ? [feed.entry] : [];
return entries.map((entry: any) => {
const links = Array.isArray(entry.link) ? entry.link : entry.link ? [entry.link] : [];
const altLink = links.find((l: any) => l["@_rel"] === "alternate") ?? links[0];
return {
title: String(entry.title?.["#text"] ?? entry.title ?? ""),
description: String(entry.summary?.["#text"] ?? entry.summary ?? entry.content?.["#text"] ?? ""),
link: String(altLink?.["@_href"] ?? ""),
pubDate: String(entry.published ?? entry.updated ?? ""),
guid: String(entry.id ?? altLink?.["@_href"] ?? ""),
};
});
}
throw new Error("Format RSS/Atom non reconnu");
}
// ─── Traitement d'un flux ─────────────────────────────────────────────────────
async function processFeed(feed: RssFeed): Promise<FetchResult> {
const db = await getDb();
if (!db) throw new Error("Database not available");
const result: FetchResult = {
feedId: feed.id,
feedName: feed.name,
status: "ok",
newItems: 0,
skippedItems: 0,
};
try {
const items = await fetchAndParseRss(feed.url);
const rules: AutoRule[] = Array.isArray(feed.autoRules) ? feed.autoRules as AutoRule[] : [];
for (const item of items) {
const title = stripHtml(item.title || "");
const description = stripHtml(item.description || "");
const link = item.link || item.guid || "";
const pubDate = parseDate(item.pubDate);
if (!title) {
result.skippedItems++;
continue;
}
// Clé de déduplication basée sur le titre + lien
const dedupKey = dedupHash(title + "|" + link);
if (feed.feedType === "veille") {
// Déterminer le type de veille
const matchedRule = applyAutoRules(title, description, rules);
const typeVeille = (matchedRule?.typeVeille ?? feed.defaultTypeVeille ?? "generale") as
"reglementaire" | "concurrentielle" | "technologique" | "generale";
try {
await db.insert(veilleItems).values({
dedupKey,
titre: title,
resume: description || null,
source: feed.name,
lien: link || null,
typeVeille,
datePublication: pubDate,
});
result.newItems++;
} catch (e: any) {
// Doublon (contrainte UNIQUE sur dedupKey) → on ignore
if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) {
result.skippedItems++;
} else {
throw e;
}
}
} else if (feed.feedType === "aap") {
// Déterminer la catégorie AAP
const matchedRule = applyAutoRules(title, description, rules);
const categorie = (matchedRule?.categorieAap ?? feed.defaultCategorieAap ?? "Autre") as
"Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre";
try {
await db.insert(aapItems).values({
dedupKey,
titre: title,
categorie,
lien: link || null,
datePublication: pubDate,
});
result.newItems++;
} catch (e: any) {
if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) {
result.skippedItems++;
} else {
throw e;
}
}
}
}
// Mettre à jour lastFetchedAt et lastFetchStatus
await db.update(rssFeeds)
.set({ lastFetchedAt: new Date(), lastFetchStatus: "ok", lastFetchError: null })
.where(eq(rssFeeds.id, feed.id));
} catch (e: any) {
result.status = "error";
result.error = e?.message ?? String(e);
// Enregistrer l'erreur dans le flux
try {
await db.update(rssFeeds)
.set({ lastFetchedAt: new Date(), lastFetchStatus: "error", lastFetchError: result.error })
.where(eq(rssFeeds.id, feed.id));
} catch (_) { /* ignore */ }
}
return result;
}
// ─── Point d'entrée principal ─────────────────────────────────────────────────
export interface RssFetchSummary {
totalFeeds: number;
successFeeds: number;
errorFeeds: number;
totalNewItems: number;
totalSkippedItems: number;
results: FetchResult[];
executedAt: string;
}
export async function runRssFetch(): Promise<RssFetchSummary> {
const db = await getDb();
if (!db) throw new Error("Database not available");
// Récupérer tous les flux actifs
const feeds = await db.select().from(rssFeeds).where(eq(rssFeeds.isActive, true));
const results: FetchResult[] = [];
for (const feed of feeds) {
console.log(`[RSS] Lecture du flux: ${feed.name} (${feed.url})`);
const result = await processFeed(feed);
results.push(result);
console.log(`[RSS] ${feed.name}: ${result.newItems} nouveaux, ${result.skippedItems} doublons, statut: ${result.status}`);
}
const summary: RssFetchSummary = {
totalFeeds: feeds.length,
successFeeds: results.filter(r => r.status === "ok").length,
errorFeeds: results.filter(r => r.status === "error").length,
totalNewItems: results.reduce((acc, r) => acc + r.newItems, 0),
totalSkippedItems: results.reduce((acc, r) => acc + r.skippedItems, 0),
results,
executedAt: new Date().toISOString(),
};
console.log(`[RSS] Terminé: ${summary.totalNewItems} nouveaux articles, ${summary.errorFeeds} erreurs`);
return summary;
}

62
server/scheduledRoutes.ts Normal file
View File

@@ -0,0 +1,62 @@
/**
* Routes pour les tâches planifiées.
* POST /api/scheduled/rss-fetch — déclenche la lecture de tous les flux RSS actifs.
* Protégé par cookie de session (rôle "user" minimum, conforme aux tâches planifiées Manus).
*/
import express, { Router, Request, Response } from "express";
import { parse as parseCookieHeader } from "cookie";
import { verifyLocalToken, LOCAL_AUTH_COOKIE } from "./localAuth";
import { sdk } from "./_core/sdk";
import { runRssFetch } from "./rssEngine";
const router: Router = express.Router();
/**
* Middleware d'authentification léger :
* accepte soit un cookie veille_local_auth (utilisateurs locaux),
* soit un cookie Manus OAuth (app_session_id via sdk.authenticateRequest).
*/
async function requireAuth(req: Request, res: Response, next: () => void) {
try {
const cookieHeader = req.headers.cookie ?? "";
const cookies = parseCookieHeader(cookieHeader);
// 1. Cookie local
const localToken = cookies[LOCAL_AUTH_COOKIE];
if (localToken) {
const user = await verifyLocalToken(localToken);
if (user) return next();
}
// 2. Cookie Manus OAuth
try {
await sdk.authenticateRequest(req);
return next();
} catch (_) { /* pas de session OAuth valide */ }
res.status(401).json({ error: "Non authentifié" });
} catch (e) {
res.status(401).json({ error: "Erreur d'authentification" });
}
}
/**
* POST /api/scheduled/rss-fetch
* Déclenche la lecture de tous les flux RSS actifs et insère les nouveaux articles.
*/
router.post("/api/scheduled/rss-fetch", requireAuth, async (req: Request, res: Response) => {
console.log("[Scheduled] Déclenchement de la lecture RSS...");
try {
const summary = await runRssFetch();
res.json({
success: true,
summary,
});
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e);
console.error("[Scheduled/rss-fetch] Erreur:", msg);
res.status(500).json({ success: false, error: msg });
}
});
export default router;

View File

@@ -74,6 +74,15 @@
- [x] Navigation : ajouter l'entrée RSS dans le menu latéral (DashboardLayout) - [x] Navigation : ajouter l'entrée RSS dans le menu latéral (DashboardLayout)
- [ ] Déploiement VPS via Gitea CI/CD - [ ] Déploiement VPS via Gitea CI/CD
## Moteur RSS automatique
- [ ] Installer le parseur RSS (fast-xml-parser) côté serveur
- [ ] Ajouter table rss_seen_articles (guid unique pour éviter les doublons)
- [ ] Ajouter champ last_fetched_at sur rss_feeds
- [ ] Créer server/rssEngine.ts : parseur RSS + application des règles d'automatisme
- [ ] Créer endpoint POST /api/scheduled/rss-fetch (auth cookie session)
- [ ] Configurer la tâche planifiée Manus à 06h00 quotidien
- [ ] Déployer sur le VPS
## Purge des données ## Purge des données
- [ ] Procédures tRPC : veille.purge et aap.purge (adminProcedure) - [ ] Procédures tRPC : veille.purge et aap.purge (adminProcedure)
- [ ] Bouton "Purger les données" en haut à droite de VeilleDashboard.tsx (admin uniquement) - [ ] Bouton "Purger les données" en haut à droite de VeilleDashboard.tsx (admin uniquement)