Files
veille-reglementaire/server/importer.ts

391 lines
13 KiB
TypeScript

import * as XLSX from "xlsx";
import * as crypto from "crypto";
import * as fs from "fs";
import * as path from "path";
import * as ftp from "basic-ftp";
import * as https from "https";
import * as http from "http";
import { getDb } from "./db";
import { veilleItems, aapItems, importLogs, appSettings } from "../drizzle/schema";
import { eq, inArray } from "drizzle-orm";
// ─── Types ───────────────────────────────────────────────────────────────────
export type SourceType = "local" | "onedrive" | "ftp" | "sharepoint";
export interface ImportConfig {
sourceType: SourceType;
veilleFilePath?: string;
aapFilePath?: string;
ftpHost?: string;
ftpPort?: number;
ftpUser?: string;
ftpPassword?: string;
ftpSecure?: boolean;
onedriveToken?: string;
sharepointSiteUrl?: string;
sharepointToken?: string;
}
export interface ImportResult {
fileType: "veille" | "aap";
totalRows: number;
newRows: number;
skippedRows: number;
errors: string[];
status: "success" | "partial" | "error";
}
// ─── Utilitaires ─────────────────────────────────────────────────────────────
function makeDedupKey(titre: string, lien?: string | null): string {
const raw = `${(titre || "").trim().toLowerCase()}|${(lien || "").trim().toLowerCase()}`;
return crypto.createHash("md5").update(raw).digest("hex");
}
function parseDate(value: unknown): Date | null {
if (!value) return null;
if (value instanceof Date) return isNaN(value.getTime()) ? null : value;
if (typeof value === "string") {
const cleaned = value.replace("Z", "").trim();
const d = new Date(cleaned);
return isNaN(d.getTime()) ? null : d;
}
if (typeof value === "number") {
// Excel serial date
const d = XLSX.SSF.parse_date_code(value);
if (d) return new Date(d.y, d.m - 1, d.d);
}
return null;
}
function normalizeStr(v: unknown): string | null {
if (v === null || v === undefined) return null;
const s = String(v).trim();
return s === "" || s === "Non renseigné" ? null : s;
}
// ─── Téléchargement des fichiers selon la source ─────────────────────────────
async function downloadFile(
filePath: string,
config: ImportConfig
): Promise<Buffer> {
switch (config.sourceType) {
case "local": {
if (!fs.existsSync(filePath)) {
throw new Error(`Fichier introuvable : ${filePath}`);
}
return fs.readFileSync(filePath);
}
case "ftp": {
const client = new ftp.Client();
client.ftp.verbose = false;
try {
await client.access({
host: config.ftpHost!,
port: config.ftpPort || 21,
user: config.ftpUser!,
password: config.ftpPassword!,
secure: config.ftpSecure || false,
});
const tmpPath = `/tmp/veille_import_${Date.now()}.xlsx`;
await client.downloadTo(tmpPath, filePath);
const buf = fs.readFileSync(tmpPath);
fs.unlinkSync(tmpPath);
return buf;
} finally {
client.close();
}
}
case "onedrive":
case "sharepoint": {
const token =
config.sourceType === "onedrive"
? config.onedriveToken
: config.sharepointToken;
if (!token) throw new Error("Token d'authentification manquant");
return new Promise((resolve, reject) => {
const url = new URL(filePath);
const options = {
hostname: url.hostname,
path: url.pathname + url.search,
headers: { Authorization: `Bearer ${token}` },
};
const protocol = url.protocol === "https:" ? https : http;
protocol
.get(options, (res) => {
const chunks: Buffer[] = [];
res.on("data", (c) => chunks.push(c));
res.on("end", () => resolve(Buffer.concat(chunks)));
res.on("error", reject);
})
.on("error", reject);
});
}
default:
throw new Error(`Source non supportée : ${config.sourceType}`);
}
}
// ─── Lecture des paramètres depuis la BDD ────────────────────────────────────
export async function getImportConfig(): Promise<ImportConfig> {
const db = await getDb();
if (!db) return { sourceType: "local" };
const rows = await db.select().from(appSettings);
const map: Record<string, string> = {};
for (const r of rows) {
if (r.key && r.value) map[r.key] = r.value;
}
return {
sourceType: (map["source_type"] as SourceType) || "local",
veilleFilePath: map["veille_file_path"] || "",
aapFilePath: map["aap_file_path"] || "",
ftpHost: map["ftp_host"],
ftpPort: map["ftp_port"] ? parseInt(map["ftp_port"]) : 21,
ftpUser: map["ftp_user"],
ftpPassword: map["ftp_password"],
ftpSecure: map["ftp_secure"] === "true",
onedriveToken: map["onedrive_token"],
sharepointSiteUrl: map["sharepoint_site_url"],
sharepointToken: map["sharepoint_token"],
};
}
// ─── Import Veille Stratégique ───────────────────────────────────────────────
const VEILLE_SHEETS: Record<string, "reglementaire" | "concurrentielle" | "technologique" | "generale"> = {
réglementaire: "reglementaire",
reglementaire: "reglementaire",
concurrentielle: "concurrentielle",
technologique: "technologique",
générale: "generale",
generale: "generale",
};
export async function importVeille(config: ImportConfig): Promise<ImportResult> {
const startedAt = new Date();
const errors: string[] = [];
let totalRows = 0;
let newRows = 0;
let skippedRows = 0;
const db = await getDb();
if (!db) throw new Error("Base de données indisponible");
const filePath = config.veilleFilePath;
if (!filePath) throw new Error("Chemin du fichier Veille non configuré");
let buffer: Buffer;
try {
buffer = await downloadFile(filePath, config);
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e);
await logImport(db, "veille", filePath, "error", 0, 0, 0, msg, null, startedAt);
return { fileType: "veille", totalRows: 0, newRows: 0, skippedRows: 0, errors: [msg], status: "error" };
}
const workbook = XLSX.read(buffer, { type: "buffer", cellDates: true });
for (const sheetName of workbook.SheetNames) {
const normalized = sheetName.toLowerCase().trim();
if (normalized === "poubelle") continue;
const typeVeille = VEILLE_SHEETS[normalized];
if (!typeVeille) continue;
const sheet = workbook.Sheets[sheetName];
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: null });
for (const row of rows) {
totalRows++;
const titre = normalizeStr(row["Titre"]);
if (!titre) { skippedRows++; continue; }
const lien = normalizeStr(row["Lien"]);
const dedupKey = makeDedupKey(titre, lien);
// Vérifier si déjà présent
const existing = await db
.select({ id: veilleItems.id })
.from(veilleItems)
.where(eq(veilleItems.dedupKey, dedupKey))
.limit(1);
if (existing.length > 0) { skippedRows++; continue; }
// Extraire la date depuis la colonne Source (qui contient une date ISO)
const sourceRaw = row["Source"];
const datePublication = parseDate(sourceRaw);
// La vraie source (URL) semble être dans Lien pour certaines feuilles
const sourceStr = normalizeStr(sourceRaw instanceof Date ? null : sourceRaw);
try {
await db.insert(veilleItems).values({
dedupKey,
titre,
categorie: normalizeStr(row["Catégorie"]),
niveau: normalizeStr(row["Niveau"]),
territoire: normalizeStr(row["Territoire"]),
resume: normalizeStr(row[" Résumé"] ?? row["Résumé"] ?? row["Resume"]),
source: sourceStr,
passage: normalizeStr(row["passage"] ?? row["Passage"]),
lien,
typeVeille,
datePublication,
});
newRows++;
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e);
errors.push(`[${sheetName}] ${titre?.substring(0, 50)}: ${msg}`);
skippedRows++;
}
}
}
const status = errors.length === 0 ? "success" : newRows > 0 ? "partial" : "error";
await logImport(db, "veille", filePath, status, totalRows, newRows, skippedRows, errors.join("\n") || null, { errors }, startedAt);
return { fileType: "veille", totalRows, newRows, skippedRows, errors, status };
}
// ─── Import Appels à Projets ─────────────────────────────────────────────────
const AAP_SHEETS: Record<string, "Handicap" | "PA" | "Enfance" | "Précarité" | "Sanitaire" | "Autre"> = {
handicap: "Handicap",
pa: "PA",
enfance: "Enfance",
"précarité": "Précarité",
precarite: "Précarité",
sanitaire: "Sanitaire",
autre: "Autre",
};
export async function importAAP(config: ImportConfig): Promise<ImportResult> {
const startedAt = new Date();
const errors: string[] = [];
let totalRows = 0;
let newRows = 0;
let skippedRows = 0;
const db = await getDb();
if (!db) throw new Error("Base de données indisponible");
const filePath = config.aapFilePath;
if (!filePath) throw new Error("Chemin du fichier AAP non configuré");
let buffer: Buffer;
try {
buffer = await downloadFile(filePath, config);
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e);
await logImport(db, "aap", filePath, "error", 0, 0, 0, msg, null, startedAt);
return { fileType: "aap", totalRows: 0, newRows: 0, skippedRows: 0, errors: [msg], status: "error" };
}
const workbook = XLSX.read(buffer, { type: "buffer", cellDates: true });
for (const sheetName of workbook.SheetNames) {
const normalized = sheetName.toLowerCase().trim().replace(/é/g, "e").replace(/è/g, "e");
const categorie = AAP_SHEETS[sheetName.toLowerCase().trim()] || AAP_SHEETS[normalized];
if (!categorie) continue;
const sheet = workbook.Sheets[sheetName];
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: null });
for (const row of rows) {
totalRows++;
const titre = normalizeStr(row["Titre"]);
if (!titre) { skippedRows++; continue; }
const lien = normalizeStr(row["Lien"]);
const dedupKey = makeDedupKey(titre, lien);
const existing = await db
.select({ id: aapItems.id })
.from(aapItems)
.where(eq(aapItems.dedupKey, dedupKey))
.limit(1);
if (existing.length > 0) { skippedRows++; continue; }
const datePublication = parseDate(row["Date publication"]);
const dateCloture = parseDate(row["Date clôture"]);
try {
await db.insert(aapItems).values({
dedupKey,
titre,
categorie,
region: normalizeStr(row["Région"]),
departement: normalizeStr(row["Département"]),
dateCloture,
datePublication,
lien,
});
newRows++;
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e);
errors.push(`[${sheetName}] ${titre?.substring(0, 50)}: ${msg}`);
skippedRows++;
}
}
}
const status = errors.length === 0 ? "success" : newRows > 0 ? "partial" : "error";
await logImport(db, "aap", filePath, status, totalRows, newRows, skippedRows, errors.join("\n") || null, { errors }, startedAt);
return { fileType: "aap", totalRows, newRows, skippedRows, errors, status };
}
// ─── Import complet (veille + AAP) ───────────────────────────────────────────
export async function runFullImport(): Promise<{ veille: ImportResult; aap: ImportResult }> {
const config = await getImportConfig();
const [veille, aap] = await Promise.all([
importVeille(config),
importAAP(config),
]);
return { veille, aap };
}
// ─── Enregistrement des logs ─────────────────────────────────────────────────
async function logImport(
db: Awaited<ReturnType<typeof getDb>>,
fileType: "veille" | "aap",
source: string,
status: "success" | "partial" | "error",
totalRows: number,
newRows: number,
skippedRows: number,
errorMessage: string | null,
details: unknown,
startedAt: Date
) {
if (!db) return;
try {
await db.insert(importLogs).values({
fileType,
source,
status,
totalRows,
newRows,
skippedRows,
errorMessage,
details: details as Record<string, unknown> | null,
startedAt,
completedAt: new Date(),
});
} catch (e) {
console.error("[Import] Erreur lors de l'enregistrement du log:", e);
}
}