diff --git a/drizzle/0005_last_nightshade.sql b/drizzle/0005_last_nightshade.sql new file mode 100644 index 0000000..dcd65db --- /dev/null +++ b/drizzle/0005_last_nightshade.sql @@ -0,0 +1,2 @@ +ALTER TABLE `aap_items` ADD `departements` json;--> statement-breakpoint +ALTER TABLE `veille_items` ADD `territoires` json; \ No newline at end of file diff --git a/drizzle/meta/0005_snapshot.json b/drizzle/meta/0005_snapshot.json new file mode 100644 index 0000000..63c6afb --- /dev/null +++ b/drizzle/meta/0005_snapshot.json @@ -0,0 +1,862 @@ +{ + "version": "5", + "dialect": "mysql", + "id": "ed89995e-aed6-4b70-9528-d1d0b48df858", + "prevId": "91cbc9bd-a436-4462-8a36-915ac2e72e28", + "tables": { + "aap_items": { + "name": "aap_items", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "dedupKey": { + "name": "dedupKey", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "titre": { + "name": "titre", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "categorie": { + "name": "categorie", + "type": "enum('Handicap','PA','Enfance','Précarité','Sanitaire','Autre')", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "region": { + "name": "region", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "departement": { + "name": "departement", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "departements": { + "name": "departements", + "type": "json", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "dateCloture": { + "name": "dateCloture", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "datePublication": { + "name": "datePublication", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "lien": { + "name": "lien", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "importedAt": { + "name": "importedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "aap_items_id": { + "name": "aap_items_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": { + "aap_items_dedupKey_unique": { + "name": "aap_items_dedupKey_unique", + "columns": [ + "dedupKey" + ] + } + }, + "checkConstraint": {} + }, + "app_settings": { + "name": "app_settings", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "key": { + "name": "key", + "type": "varchar(128)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "app_settings_id": { + "name": "app_settings_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": { + "app_settings_key_unique": { + "name": "app_settings_key_unique", + "columns": [ + "key" + ] + } + }, + "checkConstraint": {} + }, + "ideas": { + "name": "ideas", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "userId": { + "name": "userId", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "userName": { + "name": "userName", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "titre": { + "name": "titre", + "type": "varchar(512)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "message": { + "name": "message", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "statut": { + "name": "statut", + "type": "enum('ouvert','en_cours','resolu','ferme')", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'ouvert'" + }, + "reponseAdmin": { + "name": "reponseAdmin", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "reponduPar": { + "name": "reponduPar", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "reponduAt": { + "name": "reponduAt", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "ideas_id": { + "name": "ideas_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": {}, + "checkConstraint": {} + }, + "import_logs": { + "name": "import_logs", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "fileType": { + "name": "fileType", + "type": "enum('veille','aap')", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "source": { + "name": "source", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "enum('success','partial','error')", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "totalRows": { + "name": "totalRows", + "type": "int", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "newRows": { + "name": "newRows", + "type": "int", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "skippedRows": { + "name": "skippedRows", + "type": "int", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "errorMessage": { + "name": "errorMessage", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "details": { + "name": "details", + "type": "json", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "startedAt": { + "name": "startedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + }, + "completedAt": { + "name": "completedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "import_logs_id": { + "name": "import_logs_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": {}, + "checkConstraint": {} + }, + "local_users": { + "name": "local_users", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "name": { + "name": "name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "username": { + "name": "username", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "email": { + "name": "email", + "type": "varchar(320)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "passwordHash": { + "name": "passwordHash", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "role": { + "name": "role", + "type": "enum('admin','user','readonly')", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'user'" + }, + "isActive": { + "name": "isActive", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": true + }, + "createdAt": { + "name": "createdAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + }, + "lastSignedIn": { + "name": "lastSignedIn", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "local_users_id": { + "name": "local_users_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": { + "local_users_username_unique": { + "name": "local_users_username_unique", + "columns": [ + "username" + ] + } + }, + "checkConstraint": {} + }, + "rss_feeds": { + "name": "rss_feeds", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "feedType": { + "name": "feedType", + "type": "enum('veille','aap')", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "defaultTypeVeille": { + "name": "defaultTypeVeille", + "type": "enum('reglementaire','concurrentielle','technologique','generale')", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "defaultCategorieAap": { + "name": "defaultCategorieAap", + "type": "enum('Handicap','PA','Enfance','Précarité','Sanitaire','Autre')", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "autoRules": { + "name": "autoRules", + "type": "json", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "isActive": { + "name": "isActive", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": true + }, + "lastFetchedAt": { + "name": "lastFetchedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "lastFetchStatus": { + "name": "lastFetchStatus", + "type": "enum('ok','error','pending')", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'pending'" + }, + "lastFetchError": { + "name": "lastFetchError", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "rss_feeds_id": { + "name": "rss_feeds_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": {}, + "checkConstraint": {} + }, + "rss_settings": { + "name": "rss_settings", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "fetchIntervalMinutes": { + "name": "fetchIntervalMinutes", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 360 + }, + "scheduledTime": { + "name": "scheduledTime", + "type": "varchar(5)", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'06:00'" + }, + "fetchMode": { + "name": "fetchMode", + "type": "enum('interval','scheduled')", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'scheduled'" + }, + "autoFetchEnabled": { + "name": "autoFetchEnabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": true + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "rss_settings_id": { + "name": "rss_settings_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": {}, + "checkConstraint": {} + }, + "users": { + "name": "users", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "openId": { + "name": "openId", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "email": { + "name": "email", + "type": "varchar(320)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "loginMethod": { + "name": "loginMethod", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "role": { + "name": "role", + "type": "enum('user','admin')", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'user'" + }, + "createdAt": { + "name": "createdAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + }, + "updatedAt": { + "name": "updatedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "onUpdate": true, + "default": "(now())" + }, + "lastSignedIn": { + "name": "lastSignedIn", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "users_id": { + "name": "users_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": { + "users_openId_unique": { + "name": "users_openId_unique", + "columns": [ + "openId" + ] + } + }, + "checkConstraint": {} + }, + "veille_items": { + "name": "veille_items", + "columns": { + "id": { + "name": "id", + "type": "int", + "primaryKey": false, + "notNull": true, + "autoincrement": true + }, + "dedupKey": { + "name": "dedupKey", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "titre": { + "name": "titre", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "categorie": { + "name": "categorie", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "niveau": { + "name": "niveau", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "territoire": { + "name": "territoire", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "territoires": { + "name": "territoires", + "type": "json", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "resume": { + "name": "resume", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "source": { + "name": "source", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "passage": { + "name": "passage", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "lien": { + "name": "lien", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "typeVeille": { + "name": "typeVeille", + "type": "enum('reglementaire','concurrentielle','technologique','generale')", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "datePublication": { + "name": "datePublication", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "importedAt": { + "name": "importedAt", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(now())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "veille_items_id": { + "name": "veille_items_id", + "columns": [ + "id" + ] + } + }, + "uniqueConstraints": { + "veille_items_dedupKey_unique": { + "name": "veille_items_dedupKey_unique", + "columns": [ + "dedupKey" + ] + } + }, + "checkConstraint": {} + } + }, + "views": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "tables": {}, + "indexes": {} + } +} \ No newline at end of file diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 98d4b2b..9377d13 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -36,6 +36,13 @@ "when": 1777149207871, "tag": "0004_clear_edwin_jarvis", "breakpoints": true + }, + { + "idx": 5, + "version": "5", + "when": 1777417532823, + "tag": "0005_last_nightshade", + "breakpoints": true } ] } \ No newline at end of file diff --git a/drizzle/schema.ts b/drizzle/schema.ts index d0b2be7..6dfa3d4 100644 --- a/drizzle/schema.ts +++ b/drizzle/schema.ts @@ -60,12 +60,14 @@ export type InsertAppSetting = typeof appSettings.$inferInsert; export const veilleItems = mysqlTable("veille_items", { id: int("id").autoincrement().primaryKey(), - // Clé de déduplication : hash du titre + lien + // Clé de déduplication : hash du titre normalisé (sans nom de département) dedupKey: varchar("dedupKey", { length: 64 }).notNull().unique(), titre: text("titre").notNull(), categorie: varchar("categorie", { length: 128 }), niveau: varchar("niveau", { length: 128 }), territoire: varchar("territoire", { length: 255 }), + // Liste JSON des territoires (multi-département) ex: ["Isère","Savoie"] + territoires: json("territoires").$type(), resume: text("resume"), source: varchar("source", { length: 512 }), passage: text("passage"), @@ -89,6 +91,8 @@ export const aapItems = mysqlTable("aap_items", { categorie: mysqlEnum("categorie", ["Handicap", "PA", "Enfance", "Précarité", "Sanitaire", "Autre"]).notNull(), region: varchar("region", { length: 255 }), departement: varchar("departement", { length: 255 }), + // Liste JSON des départements (multi-département) ex: ["Isère (38)","Savoie (73)"] + departements: json("departements").$type(), dateCloture: timestamp("dateCloture"), datePublication: timestamp("datePublication"), lien: text("lien"), diff --git a/server/rssEngine.ts b/server/rssEngine.ts index c54f974..a0e2a37 100644 --- a/server/rssEngine.ts +++ b/server/rssEngine.ts @@ -2,6 +2,14 @@ * Moteur de lecture RSS * Récupère les flux actifs, parse les articles, applique les règles d'automatisme, * et insère les nouveaux articles dans veille_items ou aap_items. + * + * Enrichissement automatique : + * - AAP : région (toujours Auvergne-Rhône-Alpes) + département extrait du titre/description + * - Veille : territoire, catégorie (Handicap/PA/Enfance/Précarité/Sanitaire/Autre), niveau + * + * Fusion multi-département : + * - Les articles avec le même titre normalisé (sans nom de département) sont fusionnés + * en un seul enregistrement avec une liste JSON de territoires/départements. */ import { XMLParser } from "fast-xml-parser"; import * as crypto from "crypto"; @@ -12,7 +20,7 @@ import { aapItems, type RssFeed, } from "../drizzle/schema"; -import { eq } from "drizzle-orm"; +import { eq, sql } from "drizzle-orm"; // ─── Types internes ─────────────────────────────────────────────────────────── @@ -36,9 +44,170 @@ interface FetchResult { status: "ok" | "error"; newItems: number; skippedItems: number; + mergedItems: number; error?: string; } +// ─── Dictionnaire des départements d'Auvergne-Rhône-Alpes ──────────────────── +// IMPORTANT : les départements composés (Haute-Loire, Haute-Savoie, Puy-de-Dôme) +// doivent être AVANT leurs variantes simples (Loire, Savoie) pour éviter les faux positifs. + +const AURA_DEPARTMENTS: Array<{ pattern: RegExp; name: string; num: string }> = [ + // Composés en premier + { pattern: /haute-savoie|haute savoie|(?]*>/g, "").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").trim(); + return html + .replace(/<[^>]*>/g, "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .trim(); } -/** - * Applique les règles d'automatisme sur le titre + description d'un article. - * Retourne le premier match trouvé, ou null si aucune règle ne correspond. - */ -function applyAutoRules( - title: string, - description: string, - rules: AutoRule[] -): AutoRule | null { +function applyAutoRules(title: string, description: string, rules: AutoRule[]): AutoRule | null { const text = (title + " " + description).toLowerCase(); for (const rule of rules) { if (text.includes(rule.keyword.toLowerCase())) { @@ -73,6 +241,21 @@ function applyAutoRules( return null; } +/** + * Ajoute un territoire à la liste JSON existante (sans doublon). + * Retourne la nouvelle liste sérialisée. + */ +function addToTerritoiresList(existing: string | null, newTerritoire: string): string { + let list: string[] = []; + if (existing) { + try { list = JSON.parse(existing); } catch { list = [existing]; } + } + if (!list.includes(newTerritoire)) { + list.push(newTerritoire); + } + return JSON.stringify(list); +} + // ─── Parsing RSS/Atom ───────────────────────────────────────────────────────── async function fetchAndParseRss(url: string): Promise { @@ -99,7 +282,6 @@ async function fetchAndParseRss(url: string): Promise { const parsed = parser.parse(xml); - // Support RSS 2.0 const channel = parsed?.rss?.channel; if (channel) { const items = Array.isArray(channel.item) ? channel.item : channel.item ? [channel.item] : []; @@ -112,7 +294,6 @@ async function fetchAndParseRss(url: string): Promise { })); } - // Support Atom const feed = parsed?.feed; if (feed) { const entries = Array.isArray(feed.entry) ? feed.entry : feed.entry ? [feed.entry] : []; @@ -144,6 +325,7 @@ async function processFeed(feed: RssFeed): Promise { status: "ok", newItems: 0, skippedItems: 0, + mergedItems: 0, }; try { @@ -155,25 +337,36 @@ async function processFeed(feed: RssFeed): Promise { const description = stripHtml(item.description || ""); const link = item.link || item.guid || ""; const pubDate = parseDate(item.pubDate); + const fullText = title + " " + description; if (!title) { result.skippedItems++; continue; } - // Clé de déduplication basée sur le titre + lien - const dedupKey = dedupHash(title + "|" + link); + // ─── Clé de déduplication : basée sur le titre NORMALISÉ (sans département) ─── + const normalizedTitle = buildMergeKey(title); + const dedupKey = dedupHash(normalizedTitle + "|" + (feed.feedType ?? "")); if (feed.feedType === "veille") { - // Déterminer le type de veille const matchedRule = applyAutoRules(title, description, rules); const typeVeille = (matchedRule?.typeVeille ?? feed.defaultTypeVeille ?? "generale") as "reglementaire" | "concurrentielle" | "technologique" | "generale"; + const categorie = detectVeilleCategorie(fullText); + const { niveau, territoire } = detectVeilleNiveauTerritoire(fullText); + try { + // Essayer d'insérer await db.insert(veilleItems).values({ dedupKey, titre: title, + categorie, + niveau, + territoire, + territoires: territoire !== "France" && territoire !== "Auvergne-Rhône-Alpes" + ? [territoire] + : [], resume: description || null, source: feed.name, lien: link || null, @@ -182,31 +375,64 @@ async function processFeed(feed: RssFeed): Promise { }); result.newItems++; } catch (e: any) { - // Doublon (contrainte UNIQUE sur dedupKey) → on ignore if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) { - result.skippedItems++; + // Article existant → ajouter le territoire à la liste si c'est un nouveau département + if (territoire !== "France" && territoire !== "Auvergne-Rhône-Alpes") { + await db.execute( + sql`UPDATE veille_items + SET territoires = JSON_ARRAY_APPEND( + COALESCE(territoires, JSON_ARRAY()), + '$', + ${territoire} + ) + WHERE dedupKey = ${dedupKey} + AND NOT JSON_CONTAINS(COALESCE(territoires, JSON_ARRAY()), ${JSON.stringify(territoire)})` + ); + result.mergedItems++; + } else { + result.skippedItems++; + } } else { throw e; } } } else if (feed.feedType === "aap") { - // Déterminer la catégorie AAP const matchedRule = applyAutoRules(title, description, rules); const categorie = (matchedRule?.categorieAap ?? feed.defaultCategorieAap ?? "Autre") as "Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre"; + const { region, departement } = detectAapGeo(fullText); + try { await db.insert(aapItems).values({ dedupKey, titre: title, categorie, + region, + departement, + departements: departement ? [departement] : [], lien: link || null, datePublication: pubDate, }); result.newItems++; } catch (e: any) { if (e?.code === "ER_DUP_ENTRY" || e?.message?.includes("Duplicate entry")) { - result.skippedItems++; + // Article existant → ajouter le département à la liste + if (departement) { + await db.execute( + sql`UPDATE aap_items + SET departements = JSON_ARRAY_APPEND( + COALESCE(departements, JSON_ARRAY()), + '$', + ${departement} + ) + WHERE dedupKey = ${dedupKey} + AND NOT JSON_CONTAINS(COALESCE(departements, JSON_ARRAY()), ${JSON.stringify(departement)})` + ); + result.mergedItems++; + } else { + result.skippedItems++; + } } else { throw e; } @@ -214,7 +440,6 @@ async function processFeed(feed: RssFeed): Promise { } } - // Mettre à jour lastFetchedAt et lastFetchStatus await db.update(rssFeeds) .set({ lastFetchedAt: new Date(), lastFetchStatus: "ok", lastFetchError: null }) .where(eq(rssFeeds.id, feed.id)); @@ -222,7 +447,6 @@ async function processFeed(feed: RssFeed): Promise { } catch (e: any) { result.status = "error"; result.error = e?.message ?? String(e); - // Enregistrer l'erreur dans le flux try { await db.update(rssFeeds) .set({ lastFetchedAt: new Date(), lastFetchStatus: "error", lastFetchError: result.error }) @@ -233,6 +457,182 @@ async function processFeed(feed: RssFeed): Promise { return result; } +// ─── Migration des articles existants ───────────────────────────────────────── + +export interface MigrationSummary { + veilleUpdated: number; + veilleMerged: number; + aapUpdated: number; + aapMerged: number; + executedAt: string; +} + +/** + * Met à jour et fusionne les articles déjà importés. + * - Recalcule catégorie, niveau, territoire pour veille_items + * - Recalcule région, département pour aap_items + * - Fusionne les articles avec le même titre normalisé + */ +export async function migrateExistingItems(): Promise { + const db = await getDb(); + if (!db) throw new Error("Database not available"); + + let veilleUpdated = 0; + let veilleMerged = 0; + let aapUpdated = 0; + let aapMerged = 0; + + // ─── 1. Recalculer les champs enrichis pour veille_items ────────────────── + const veilleRows = await db.select().from(veilleItems); + + // Grouper par titre normalisé + const veilleGroups = new Map(); + for (const row of veilleRows) { + const normalized = buildMergeKey(row.titre || ""); + const key = dedupHash(normalized + "|veille"); + if (!veilleGroups.has(key)) veilleGroups.set(key, []); + veilleGroups.get(key)!.push(row); + } + + for (const [, group] of Array.from(veilleGroups)) { + if (group.length === 1) { + // Article unique : mettre à jour les champs enrichis + const row = group[0]; + const fullText = (row.titre || "") + " " + (row.resume || ""); + const newCategorie = detectVeilleCategorie(fullText); + const { niveau: newNiveau, territoire: newTerritoire } = detectVeilleNiveauTerritoire(fullText); + const normalizedTitle = buildMergeKey(row.titre || ""); + const newDedupKey = dedupHash(normalizedTitle + "|veille"); + + await db.update(veilleItems) + .set({ + categorie: newCategorie, + niveau: newNiveau, + territoire: newTerritoire, + territoires: newTerritoire !== "France" && newTerritoire !== "Auvergne-Rhône-Alpes" + ? [newTerritoire] : [], + titre: row.titre, + dedupKey: newDedupKey, + }) + .where(eq(veilleItems.id, row.id)); + veilleUpdated++; + } else { + // Groupe : fusionner en gardant le premier, supprimer les autres + const sorted = group.sort((a: (typeof veilleRows)[number], b: (typeof veilleRows)[number]) => a.id - b.id); + const primary = sorted[0]; + const duplicates = sorted.slice(1); + + // Collecter tous les territoires + const allTerritoires: string[] = []; + for (const row of sorted) { + const fullText = (row.titre || "") + " " + (row.resume || ""); + const { territoire } = detectVeilleNiveauTerritoire(fullText); + if (territoire !== "France" && territoire !== "Auvergne-Rhône-Alpes" && !allTerritoires.includes(territoire)) { + allTerritoires.push(territoire); + } + } + + const fullText = (primary.titre || "") + " " + (primary.resume || ""); + const newCategorie = detectVeilleCategorie(fullText); + const normalizedTitle = buildMergeKey(primary.titre || ""); + const newDedupKey = dedupHash(normalizedTitle + "|veille"); + + // Mettre à jour le principal + await db.update(veilleItems) + .set({ + categorie: newCategorie, + niveau: allTerritoires.length > 1 ? "departemental" : "regional", + territoire: allTerritoires.length > 0 ? allTerritoires[0] : "Auvergne-Rhône-Alpes", + territoires: allTerritoires, + titre: primary.titre, + dedupKey: newDedupKey, + }) + .where(eq(veilleItems.id, primary.id)); + + // Supprimer les doublons + for (const dup of duplicates) { + await db.delete(veilleItems).where(eq(veilleItems.id, dup.id)); + veilleMerged++; + } + veilleUpdated++; + } + } + + // ─── 2. Recalculer les champs enrichis pour aap_items ──────────────────── + const aapRows = await db.select().from(aapItems); + + // Grouper par titre normalisé + const aapGroups = new Map(); + for (const row of aapRows) { + const normalized = buildMergeKey(row.titre || ""); + const key = dedupHash(normalized + "|aap"); + if (!aapGroups.has(key)) aapGroups.set(key, []); + aapGroups.get(key)!.push(row); + } + + for (const [, group] of Array.from(aapGroups)) { + if (group.length === 1) { + const row = group[0]; + const { region: newRegion, departement: newDept } = detectAapGeo(row.titre || ""); + const normalizedTitle = buildMergeKey(row.titre || ""); + const newDedupKey = dedupHash(normalizedTitle + "|aap"); + + await db.update(aapItems) + .set({ + region: newRegion, + departement: newDept, + departements: newDept ? [newDept] : [], + titre: row.titre, + dedupKey: newDedupKey, + }) + .where(eq(aapItems.id, row.id)); + aapUpdated++; + } else { + // Fusionner + const sorted = group.sort((a: (typeof aapRows)[number], b: (typeof aapRows)[number]) => a.id - b.id); + const primary = sorted[0]; + const duplicates = sorted.slice(1); + + const allDepts: string[] = []; + for (const row of sorted) { + const { departement } = detectAapGeo(row.titre || ""); + if (departement && !allDepts.includes(departement)) { + allDepts.push(departement); + } + } + + const normalizedTitle = buildMergeKey(primary.titre || ""); + const newDedupKey = dedupHash(normalizedTitle + "|aap"); + + await db.update(aapItems) + .set({ + region: "Auvergne-Rhône-Alpes", + departement: allDepts.length > 0 ? allDepts[0] : null, + departements: allDepts, + titre: primary.titre, + dedupKey: newDedupKey, + }) + .where(eq(aapItems.id, primary.id)); + + for (const dup of duplicates) { + await db.delete(aapItems).where(eq(aapItems.id, dup.id)); + aapMerged++; + } + aapUpdated++; + } + } + + console.log(`[Migration] Veille: ${veilleUpdated} mis à jour, ${veilleMerged} fusionnés. AAP: ${aapUpdated} mis à jour, ${aapMerged} fusionnés.`); + + return { + veilleUpdated, + veilleMerged, + aapUpdated, + aapMerged, + executedAt: new Date().toISOString(), + }; +} + // ─── Point d'entrée principal ───────────────────────────────────────────────── export interface RssFetchSummary { @@ -241,6 +641,7 @@ export interface RssFetchSummary { errorFeeds: number; totalNewItems: number; totalSkippedItems: number; + totalMergedItems: number; results: FetchResult[]; executedAt: string; } @@ -249,16 +650,14 @@ export async function runRssFetch(): Promise { const db = await getDb(); if (!db) throw new Error("Database not available"); - // Récupérer tous les flux actifs const feeds = await db.select().from(rssFeeds).where(eq(rssFeeds.isActive, true)); - const results: FetchResult[] = []; for (const feed of feeds) { console.log(`[RSS] Lecture du flux: ${feed.name} (${feed.url})`); const result = await processFeed(feed); results.push(result); - console.log(`[RSS] ${feed.name}: ${result.newItems} nouveaux, ${result.skippedItems} doublons, statut: ${result.status}`); + console.log(`[RSS] ${feed.name}: ${result.newItems} nouveaux, ${result.mergedItems} fusionnés, ${result.skippedItems} doublons, statut: ${result.status}`); } const summary: RssFetchSummary = { @@ -267,10 +666,11 @@ export async function runRssFetch(): Promise { errorFeeds: results.filter(r => r.status === "error").length, totalNewItems: results.reduce((acc, r) => acc + r.newItems, 0), totalSkippedItems: results.reduce((acc, r) => acc + r.skippedItems, 0), + totalMergedItems: results.reduce((acc, r) => acc + r.mergedItems, 0), results, executedAt: new Date().toISOString(), }; - console.log(`[RSS] Terminé: ${summary.totalNewItems} nouveaux articles, ${summary.errorFeeds} erreurs`); + console.log(`[RSS] Terminé: ${summary.totalNewItems} nouveaux, ${summary.totalMergedItems} fusionnés, ${summary.errorFeeds} erreurs`); return summary; } diff --git a/server/scheduledRoutes.ts b/server/scheduledRoutes.ts index 3631855..b7a17bb 100644 --- a/server/scheduledRoutes.ts +++ b/server/scheduledRoutes.ts @@ -1,13 +1,14 @@ /** * Routes pour les tâches planifiées. - * POST /api/scheduled/rss-fetch — déclenche la lecture de tous les flux RSS actifs. + * POST /api/scheduled/rss-fetch — déclenche la lecture de tous les flux RSS actifs. + * POST /api/scheduled/rss-migrate — met à jour les articles existants avec les champs enrichis. * Protégé par cookie de session (rôle "user" minimum, conforme aux tâches planifiées Manus). */ import express, { Router, Request, Response } from "express"; import { parse as parseCookieHeader } from "cookie"; import { verifyLocalToken, LOCAL_AUTH_COOKIE } from "./localAuth"; import { sdk } from "./_core/sdk"; -import { runRssFetch } from "./rssEngine"; +import { runRssFetch, migrateExistingItems } from "./rssEngine"; const router: Router = express.Router(); @@ -59,4 +60,25 @@ router.post("/api/scheduled/rss-fetch", requireAuth, async (req: Request, res: R } }); +/** + * POST /api/scheduled/rss-migrate + * Met à jour les articles existants (veille_items et aap_items) avec les champs enrichis : + * - veille_items : catégorie, niveau, territoire + * - aap_items : région, département + */ +router.post("/api/scheduled/rss-migrate", requireAuth, async (req: Request, res: Response) => { + console.log("[Scheduled] Migration des articles existants..."); + try { + const summary = await migrateExistingItems(); + res.json({ + success: true, + summary, + }); + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + console.error("[Scheduled/rss-migrate] Erreur:", msg); + res.status(500).json({ success: false, error: msg }); + } +}); + export default router;