diff --git a/client/public/__manus__/version.json b/client/public/__manus__/version.json index f216d14..c9be79d 100644 --- a/client/public/__manus__/version.json +++ b/client/public/__manus__/version.json @@ -1,4 +1,4 @@ { - "version": "a13a3f60", - "timestamp": 1777365200860 + "version": "f278ae22", + "timestamp": 1777394158072 } \ No newline at end of file diff --git a/package.json b/package.json index 5a0dabd..177d54d 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "drizzle-orm": "^0.44.5", "embla-carousel-react": "^8.6.0", "express": "^4.21.2", + "fast-xml-parser": "^5.7.2", "framer-motion": "^12.23.22", "input-otp": "^1.4.2", "jose": "6.1.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a2eb3f7..6c4fd0f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -160,6 +160,9 @@ importers: express: specifier: ^4.21.2 version: 4.21.2 + fast-xml-parser: + specifier: ^5.7.2 + version: 5.7.2 framer-motion: specifier: ^12.23.22 version: 12.23.22(react-dom@19.2.1(react@19.2.1))(react@19.2.1) @@ -1079,6 +1082,9 @@ packages: '@mermaid-js/parser@0.6.3': resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==} + '@nodable/entities@2.1.0': + resolution: {integrity: sha512-nyT7T3nbMyBI/lvr6L5TyWbFJAI9FTgVRakNoBqCD+PmID8DzFrrNdLLtHMwMszOtqZa8PAOV24ZqDnQrhQINA==} + '@radix-ui/number@1.1.1': resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==} @@ -3081,10 +3087,17 @@ packages: resolution: {integrity: sha512-6rxyATwPCkaFIL3JLqw8qXqMpIZ942pTX/tbQFkRsDGblS8tNGtlUauA/+mt6RUfqn/4MoEr+WDkYoIQbibWuQ==} engines: {node: '>=6.0.0'} + fast-xml-builder@1.1.5: + resolution: {integrity: sha512-4TJn/8FKLeslLAH3dnohXqE3QSoxkhvaMzepOIZytwJXZO69Bfz0HBdDHzOTOon6G59Zrk6VQ2bEiv1t61rfkA==} + fast-xml-parser@5.2.5: resolution: {integrity: sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ==} hasBin: true + fast-xml-parser@5.7.2: + resolution: {integrity: sha512-P7oW7tLbYnhOLQk/Gv7cZgzgMPP/XN03K02/Jy6Y/NHzyIAIpxuZIM/YqAkfiXFPxA2CTm7NtCijK9EDu09u2w==} + hasBin: true + fdir@6.5.0: resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} engines: {node: '>=12.0.0'} @@ -3734,6 +3747,10 @@ packages: path-data-parser@0.1.0: resolution: {integrity: sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==} + path-expression-matcher@1.5.0: + resolution: {integrity: sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ==} + engines: {node: '>=14.0.0'} + path-to-regexp@0.1.12: resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==} @@ -4076,6 +4093,9 @@ packages: strnum@2.1.1: resolution: {integrity: sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw==} + strnum@2.2.3: + resolution: {integrity: sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==} + style-to-js@1.1.18: resolution: {integrity: sha512-JFPn62D4kJaPTnhFUI244MThx+FEGbi+9dw1b9yBBQ+1CZpV7QAT8kUtJ7b7EUNdHajjF/0x8fT+16oLJoojLg==} @@ -5366,6 +5386,8 @@ snapshots: dependencies: langium: 3.3.1 + '@nodable/entities@2.1.0': {} + '@radix-ui/number@1.1.1': {} '@radix-ui/primitive@1.1.3': {} @@ -7518,10 +7540,21 @@ snapshots: fast-equals@5.3.2: {} + fast-xml-builder@1.1.5: + dependencies: + path-expression-matcher: 1.5.0 + fast-xml-parser@5.2.5: dependencies: strnum: 2.1.1 + fast-xml-parser@5.7.2: + dependencies: + '@nodable/entities': 2.1.0 + fast-xml-builder: 1.1.5 + path-expression-matcher: 1.5.0 + strnum: 2.2.3 + fdir@6.5.0(picomatch@4.0.3): optionalDependencies: picomatch: 4.0.3 @@ -8423,6 +8456,8 @@ snapshots: path-data-parser@0.1.0: {} + path-expression-matcher@1.5.0: {} + path-to-regexp@0.1.12: {} pathe@1.1.2: {} @@ -8872,6 +8907,8 @@ snapshots: strnum@2.1.1: {} + strnum@2.2.3: {} + style-to-js@1.1.18: dependencies: style-to-object: 1.0.11 diff --git a/server/_core/index.ts b/server/_core/index.ts index 97a11c5..e784363 100644 --- a/server/_core/index.ts +++ b/server/_core/index.ts @@ -10,6 +10,7 @@ import { createContext } from "./context"; import { serveStatic, setupVite } from "./vite"; import { runFullImport } from "../importer"; import uploadRoutes from "../uploadRoutes"; +import scheduledRoutes from "../scheduledRoutes"; import { ensureAdminExists } from "../localAuth"; import { getSetting } from "../db"; @@ -65,6 +66,7 @@ async function startServer() { registerOAuthRoutes(app); app.use(uploadRoutes); + app.use(scheduledRoutes); app.use( "/api/trpc", diff --git a/server/rssEngine.ts b/server/rssEngine.ts new file mode 100644 index 0000000..c54f974 --- /dev/null +++ b/server/rssEngine.ts @@ -0,0 +1,276 @@ +/** + * Moteur de lecture RSS + * Récupère les flux actifs, parse les articles, applique les règles d'automatisme, + * et insère les nouveaux articles dans veille_items ou aap_items. + */ +import { XMLParser } from "fast-xml-parser"; +import * as crypto from "crypto"; +import { getDb } from "./db"; +import { + rssFeeds, + veilleItems, + aapItems, + type RssFeed, +} from "../drizzle/schema"; +import { eq } from "drizzle-orm"; + +// ─── Types internes ─────────────────────────────────────────────────────────── + +interface RssItem { + title: string; + description?: string; + link?: string; + pubDate?: string; + guid?: string; +} + +interface AutoRule { + keyword: string; + typeVeille?: "reglementaire" | "concurrentielle" | "technologique" | "generale"; + categorieAap?: "Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre"; +} + +interface FetchResult { + feedId: number; + feedName: string; + status: "ok" | "error"; + newItems: number; + skippedItems: number; + error?: string; +} + +// ─── Utilitaires ───────────────────────────────────────────────────────────── + +function dedupHash(text: string): string { + return crypto.createHash("sha256").update(text).digest("hex").substring(0, 64); +} + +function parseDate(dateStr?: string): Date | null { + if (!dateStr) return null; + const d = new Date(dateStr); + return isNaN(d.getTime()) ? null : d; +} + +function stripHtml(html: string): string { + return html.replace(/<[^>]*>/g, "").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").trim(); +} + +/** + * Applique les règles d'automatisme sur le titre + description d'un article. + * Retourne le premier match trouvé, ou null si aucune règle ne correspond. + */ +function applyAutoRules( + title: string, + description: string, + rules: AutoRule[] +): AutoRule | null { + const text = (title + " " + description).toLowerCase(); + for (const rule of rules) { + if (text.includes(rule.keyword.toLowerCase())) { + return rule; + } + } + return null; +} + +// ─── Parsing RSS/Atom ───────────────────────────────────────────────────────── + +async function fetchAndParseRss(url: string): Promise { + const response = await fetch(url, { + headers: { + "User-Agent": "Mozilla/5.0 (compatible; VeilleBot/1.0; +https://itinova.fr)", + "Accept": "application/rss+xml, application/xml, text/xml, */*", + }, + signal: AbortSignal.timeout(15000), + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status} ${response.statusText}`); + } + + const xml = await response.text(); + const parser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: "@_", + textNodeName: "#text", + parseAttributeValue: true, + trimValues: true, + }); + + const parsed = parser.parse(xml); + + // Support RSS 2.0 + const channel = parsed?.rss?.channel; + if (channel) { + const items = Array.isArray(channel.item) ? channel.item : channel.item ? [channel.item] : []; + return items.map((item: any) => ({ + title: String(item.title?.["#text"] ?? item.title ?? ""), + description: String(item.description?.["#text"] ?? item.description ?? ""), + link: String(item.link?.["#text"] ?? item.link ?? item.guid?.["#text"] ?? item.guid ?? ""), + pubDate: String(item.pubDate ?? item["dc:date"] ?? ""), + guid: String(item.guid?.["#text"] ?? item.guid ?? item.link ?? ""), + })); + } + + // Support Atom + const feed = parsed?.feed; + if (feed) { + const entries = Array.isArray(feed.entry) ? feed.entry : feed.entry ? [feed.entry] : []; + return entries.map((entry: any) => { + const links = Array.isArray(entry.link) ? entry.link : entry.link ? [entry.link] : []; + const altLink = links.find((l: any) => l["@_rel"] === "alternate") ?? links[0]; + return { + title: String(entry.title?.["#text"] ?? entry.title ?? ""), + description: String(entry.summary?.["#text"] ?? entry.summary ?? entry.content?.["#text"] ?? ""), + link: String(altLink?.["@_href"] ?? ""), + pubDate: String(entry.published ?? entry.updated ?? ""), + guid: String(entry.id ?? altLink?.["@_href"] ?? ""), + }; + }); + } + + throw new Error("Format RSS/Atom non reconnu"); +} + +// ─── Traitement d'un flux ───────────────────────────────────────────────────── + +async function processFeed(feed: RssFeed): Promise { + const db = await getDb(); + if (!db) throw new Error("Database not available"); + + const result: FetchResult = { + feedId: feed.id, + feedName: feed.name, + status: "ok", + newItems: 0, + skippedItems: 0, + }; + + try { + const items = await fetchAndParseRss(feed.url); + const rules: AutoRule[] = Array.isArray(feed.autoRules) ? feed.autoRules as AutoRule[] : []; + + for (const item of items) { + const title = stripHtml(item.title || ""); + const description = stripHtml(item.description || ""); + const link = item.link || item.guid || ""; + const pubDate = parseDate(item.pubDate); + + if (!title) { + result.skippedItems++; + continue; + } + + // Clé de déduplication basée sur le titre + lien + const dedupKey = dedupHash(title + "|" + link); + + if (feed.feedType === "veille") { + // Déterminer le type de veille + const matchedRule = applyAutoRules(title, description, rules); + const typeVeille = (matchedRule?.typeVeille ?? feed.defaultTypeVeille ?? "generale") as + "reglementaire" | "concurrentielle" | "technologique" | "generale"; + + try { + await db.insert(veilleItems).values({ + dedupKey, + titre: title, + resume: description || null, + source: feed.name, + lien: link || null, + typeVeille, + datePublication: pubDate, + }); + result.newItems++; + } catch (e: any) { + // Doublon (contrainte UNIQUE sur dedupKey) → on ignore + if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) { + result.skippedItems++; + } else { + throw e; + } + } + } else if (feed.feedType === "aap") { + // Déterminer la catégorie AAP + const matchedRule = applyAutoRules(title, description, rules); + const categorie = (matchedRule?.categorieAap ?? feed.defaultCategorieAap ?? "Autre") as + "Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre"; + + try { + await db.insert(aapItems).values({ + dedupKey, + titre: title, + categorie, + lien: link || null, + datePublication: pubDate, + }); + result.newItems++; + } catch (e: any) { + if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) { + result.skippedItems++; + } else { + throw e; + } + } + } + } + + // Mettre à jour lastFetchedAt et lastFetchStatus + await db.update(rssFeeds) + .set({ lastFetchedAt: new Date(), lastFetchStatus: "ok", lastFetchError: null }) + .where(eq(rssFeeds.id, feed.id)); + + } catch (e: any) { + result.status = "error"; + result.error = e?.message ?? String(e); + // Enregistrer l'erreur dans le flux + try { + await db.update(rssFeeds) + .set({ lastFetchedAt: new Date(), lastFetchStatus: "error", lastFetchError: result.error }) + .where(eq(rssFeeds.id, feed.id)); + } catch (_) { /* ignore */ } + } + + return result; +} + +// ─── Point d'entrée principal ───────────────────────────────────────────────── + +export interface RssFetchSummary { + totalFeeds: number; + successFeeds: number; + errorFeeds: number; + totalNewItems: number; + totalSkippedItems: number; + results: FetchResult[]; + executedAt: string; +} + +export async function runRssFetch(): Promise { + const db = await getDb(); + if (!db) throw new Error("Database not available"); + + // Récupérer tous les flux actifs + const feeds = await db.select().from(rssFeeds).where(eq(rssFeeds.isActive, true)); + + const results: FetchResult[] = []; + + for (const feed of feeds) { + console.log(`[RSS] Lecture du flux: ${feed.name} (${feed.url})`); + const result = await processFeed(feed); + results.push(result); + console.log(`[RSS] ${feed.name}: ${result.newItems} nouveaux, ${result.skippedItems} doublons, statut: ${result.status}`); + } + + const summary: RssFetchSummary = { + totalFeeds: feeds.length, + successFeeds: results.filter(r => r.status === "ok").length, + errorFeeds: results.filter(r => r.status === "error").length, + totalNewItems: results.reduce((acc, r) => acc + r.newItems, 0), + totalSkippedItems: results.reduce((acc, r) => acc + r.skippedItems, 0), + results, + executedAt: new Date().toISOString(), + }; + + console.log(`[RSS] Terminé: ${summary.totalNewItems} nouveaux articles, ${summary.errorFeeds} erreurs`); + return summary; +} diff --git a/server/scheduledRoutes.ts b/server/scheduledRoutes.ts new file mode 100644 index 0000000..3631855 --- /dev/null +++ b/server/scheduledRoutes.ts @@ -0,0 +1,62 @@ +/** + * Routes pour les tâches planifiées. + * POST /api/scheduled/rss-fetch — déclenche la lecture de tous les flux RSS actifs. + * Protégé par cookie de session (rôle "user" minimum, conforme aux tâches planifiées Manus). + */ +import express, { Router, Request, Response } from "express"; +import { parse as parseCookieHeader } from "cookie"; +import { verifyLocalToken, LOCAL_AUTH_COOKIE } from "./localAuth"; +import { sdk } from "./_core/sdk"; +import { runRssFetch } from "./rssEngine"; + +const router: Router = express.Router(); + +/** + * Middleware d'authentification léger : + * accepte soit un cookie veille_local_auth (utilisateurs locaux), + * soit un cookie Manus OAuth (app_session_id via sdk.authenticateRequest). + */ +async function requireAuth(req: Request, res: Response, next: () => void) { + try { + const cookieHeader = req.headers.cookie ?? ""; + const cookies = parseCookieHeader(cookieHeader); + + // 1. Cookie local + const localToken = cookies[LOCAL_AUTH_COOKIE]; + if (localToken) { + const user = await verifyLocalToken(localToken); + if (user) return next(); + } + + // 2. Cookie Manus OAuth + try { + await sdk.authenticateRequest(req); + return next(); + } catch (_) { /* pas de session OAuth valide */ } + + res.status(401).json({ error: "Non authentifié" }); + } catch (e) { + res.status(401).json({ error: "Erreur d'authentification" }); + } +} + +/** + * POST /api/scheduled/rss-fetch + * Déclenche la lecture de tous les flux RSS actifs et insère les nouveaux articles. + */ +router.post("/api/scheduled/rss-fetch", requireAuth, async (req: Request, res: Response) => { + console.log("[Scheduled] Déclenchement de la lecture RSS..."); + try { + const summary = await runRssFetch(); + res.json({ + success: true, + summary, + }); + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + console.error("[Scheduled/rss-fetch] Erreur:", msg); + res.status(500).json({ success: false, error: msg }); + } +}); + +export default router; diff --git a/todo.md b/todo.md index f21943e..e82b1b3 100644 --- a/todo.md +++ b/todo.md @@ -74,6 +74,15 @@ - [x] Navigation : ajouter l'entrée RSS dans le menu latéral (DashboardLayout) - [ ] Déploiement VPS via Gitea CI/CD +## Moteur RSS automatique +- [ ] Installer le parseur RSS (fast-xml-parser) côté serveur +- [ ] Ajouter table rss_seen_articles (guid unique pour éviter les doublons) +- [ ] Ajouter champ last_fetched_at sur rss_feeds +- [ ] Créer server/rssEngine.ts : parseur RSS + application des règles d'automatisme +- [ ] Créer endpoint POST /api/scheduled/rss-fetch (auth cookie session) +- [ ] Configurer la tâche planifiée Manus à 06h00 quotidien +- [ ] Déployer sur le VPS + ## Purge des données - [ ] Procédures tRPC : veille.purge et aap.purge (adminProcedure) - [ ] Bouton "Purger les données" en haut à droite de VeilleDashboard.tsx (admin uniquement)