NODEDC_1C/llm_normalizer/backend/dist/routes/eval.js

385 lines
14 KiB
JavaScript

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildEvalRouter = buildEvalRouter;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const nanoid_1 = require("nanoid");
const express_1 = require("express");
const config_1 = require("../config");
const http_1 = require("../utils/http");
const ASYNC_JOBS = new Map();
const MAX_ASYNC_JOBS = 80;
function toRecord(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function toStringSafe(value) {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toArray(value) {
return Array.isArray(value) ? value : [];
}
function normalizeQuestionChunk(value) {
return String(value ?? "")
.replace(/\r/g, " ")
.replace(/\t/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function splitQuestionCandidate(raw) {
const normalized = String(raw ?? "").replace(/\r/g, "\n").trim();
if (!normalized) {
return [];
}
const byLines = normalized
.split(/\n+/g)
.map((line) => line.replace(/^\s*(?:[-*•]|\d{1,3}[).:]?)\s*/, "").trim())
.filter((line) => line.length > 0);
const source = byLines.length > 1 ? byLines : [normalized];
const chunks = [];
for (const line of source) {
const questionLike = Array.from(line.matchAll(/[^?]+(?:\?|$)/g))
.map((match) => normalizeQuestionChunk(match[0]))
.filter((item) => item.length > 0);
if (questionLike.length > 1) {
for (const item of questionLike) {
chunks.push(item.endsWith("?") ? item : `${item}?`);
}
continue;
}
chunks.push(normalizeQuestionChunk(line));
}
return chunks.filter((item) => item.length > 0);
}
function normalizeRuntimeQuestions(value) {
const raw = toArray(value)
.map((item) => (typeof item === "string" ? item.trim() : ""))
.filter((item) => item.length > 0);
if (raw.length === 0) {
return [];
}
const expanded = raw.flatMap((item) => splitQuestionCandidate(item));
const deduped = [];
const seen = new Set();
for (const item of expanded) {
const normalized = normalizeQuestionChunk(item);
if (!normalized)
continue;
if (seen.has(normalized))
continue;
seen.add(normalized);
deduped.push(normalized);
}
return deduped;
}
function normalizeCaseIds(value) {
if (!Array.isArray(value)) {
return undefined;
}
const normalized = value
.map((item) => (typeof item === "string" ? item.trim() : ""))
.filter((item) => item.length > 0);
return normalized.length > 0 ? normalized : undefined;
}
function buildEvalPayloadFromBody(body) {
return {
normalizeConfig: (body.normalizeConfig ?? {}),
caseIds: normalizeCaseIds(body.caseIds),
useMock: Boolean(body.useMock),
mode: body.mode ?? "standard",
caseSetFile: typeof body.caseSetFile === "string" ? body.caseSetFile : undefined,
rawQuestions: typeof body.rawQuestions === "string" ? body.rawQuestions : undefined,
evalTarget: body.eval_target ?? "normalizer",
compareWithReportFile: typeof body.compare_with_report_file === "string"
? body.compare_with_report_file
: typeof body.comparisonBaselineReportFile === "string"
? body.comparisonBaselineReportFile
: undefined
};
}
function resolveReadablePath(inputPath) {
if (path_1.default.isAbsolute(inputPath)) {
return inputPath;
}
const candidates = [
path_1.default.resolve(config_1.EVAL_CASES_DIR, inputPath),
path_1.default.resolve(config_1.EVAL_DATASETS_DIR, inputPath),
path_1.default.resolve(inputPath)
];
for (const candidate of candidates) {
if (fs_1.default.existsSync(candidate)) {
return candidate;
}
}
return candidates[0];
}
function readAssistantSuiteCaseSeeds(inputPath) {
const filePath = resolveReadablePath(inputPath);
const raw = fs_1.default.readFileSync(filePath, "utf-8").replace(/^\uFEFF/, "");
const parsed = JSON.parse(raw);
const record = toRecord(parsed);
const cases = toArray(record?.cases);
return cases
.map((item) => toRecord(item))
.filter((item) => item !== null)
.map((item) => {
const caseId = toStringSafe(item.case_id);
const turns = toArray(item.turns);
if (!caseId || turns.length === 0) {
return null;
}
return {
case_id: caseId,
turns_total: turns.length
};
})
.filter((item) => item !== null);
}
function writeRuntimeAssistantSuiteFromQuestions(jobId, questions) {
if (!fs_1.default.existsSync(config_1.EVAL_CASES_DIR)) {
fs_1.default.mkdirSync(config_1.EVAL_CASES_DIR, { recursive: true });
}
const cases = questions.map((question, index) => {
const caseId = `AUTO-${String(index + 1).padStart(3, "0")}`;
return {
case_id: caseId,
scenario_tag: "autogen_runtime",
question_type: "direct",
broadness_level: "medium",
turns: [{ user_message: question }]
};
});
const payload = {
suite_id: `assistant_autogen_runtime_${jobId}`,
suite_version: "0.1.0",
schema_version: "assistant_autogen_runtime_v0_1",
scenario_count: cases.length,
case_ids: cases.map((item) => item.case_id),
cases
};
const fileName = `assistant_autogen_runtime_${jobId}.json`;
fs_1.default.writeFileSync(path_1.default.resolve(config_1.EVAL_CASES_DIR, fileName), JSON.stringify(payload, null, 2), "utf-8");
return fileName;
}
function readSessionConversation(runId, caseId) {
const sessionId = `${runId}-${caseId}`;
const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
if (!fs_1.default.existsSync(filePath)) {
return [];
}
try {
const parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
const record = toRecord(parsed);
const conversation = toArray(record?.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
return conversation.map((item, index) => ({
message_id: toStringSafe(item.message_id),
role: toStringSafe(item.role) ?? "unknown",
text: toStringSafe(item.text) ?? "",
created_at: toStringSafe(item.created_at),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type),
message_index: index,
case_id: caseId,
case_message_index: index
}));
}
catch {
return [];
}
}
function syncJobWithSessions(job) {
if (!job.run_id || !job.eval_target.startsWith("assistant_")) {
return;
}
let completed = 0;
let hasRunning = false;
for (const item of job.cases) {
const messages = readSessionConversation(job.run_id, item.case_id);
item.messages = messages;
const assistantMessages = messages.filter((entry) => entry.role === "assistant").length;
const userMessages = messages.filter((entry) => entry.role === "user").length;
if (assistantMessages >= item.turns_total && item.turns_total > 0) {
item.status = "completed";
completed += 1;
continue;
}
if (userMessages > 0 || messages.length > 0) {
item.status = "running";
hasRunning = true;
continue;
}
item.status = "queued";
}
job.completed_cases = completed;
if (job.status === "running" && !hasRunning && completed === job.total_cases && job.total_cases > 0) {
job.status = "completed";
}
}
function trimAsyncJobsStore() {
if (ASYNC_JOBS.size <= MAX_ASYNC_JOBS)
return;
const sorted = Array.from(ASYNC_JOBS.values()).sort((a, b) => Date.parse(a.updated_at) - Date.parse(b.updated_at));
for (const item of sorted) {
if (ASYNC_JOBS.size <= MAX_ASYNC_JOBS)
break;
ASYNC_JOBS.delete(item.job_id);
}
}
function snapshotJob(job) {
return {
job_id: job.job_id,
status: job.status,
created_at: job.created_at,
updated_at: job.updated_at,
eval_target: job.eval_target,
run_id: job.run_id,
case_set_file: job.case_set_file,
total_cases: job.total_cases,
completed_cases: job.completed_cases,
error: job.error,
cases: job.cases,
report_summary: job.report
? {
run_id: toStringSafe(job.report.run_id),
run_timestamp: toStringSafe(job.report.run_timestamp) ?? toStringSafe(job.report.timestamp),
score_index: typeof job.report.score_index === "number"
? Number(job.report.score_index)
: toRecord(job.report.metrics) && typeof toRecord(job.report.metrics)?.score_index === "number"
? Number(toRecord(job.report.metrics)?.score_index)
: null,
cases_total: typeof job.report.cases_total === "number" ? Number(job.report.cases_total) : null
}
: null
};
}
function buildEvalRouter(services) {
const router = (0, express_1.Router)();
router.post("/api/eval/run", async (req, res, next) => {
try {
const body = (req.body ?? {});
const payload = buildEvalPayloadFromBody(body);
const report = await services.evalService.run(payload);
(0, http_1.ok)(res, {
ok: true,
report
});
}
catch (error) {
next(error);
}
});
router.post("/api/eval/run-async/start", async (req, res, next) => {
try {
const body = (req.body ?? {});
const payload = buildEvalPayloadFromBody(body);
if (payload.evalTarget !== "assistant_stage1") {
throw new http_1.ApiError("UNSUPPORTED_ASYNC_EVAL_TARGET", "Async eval currently supports assistant_stage1 only.", 400);
}
const questions = normalizeRuntimeQuestions(body.questions);
const jobId = `job-${(0, nanoid_1.nanoid)(10)}`;
const runId = `assistant-stage1-${(0, nanoid_1.nanoid)(10)}`;
const runtimeCaseSetFile = questions.length > 0
? writeRuntimeAssistantSuiteFromQuestions(jobId, questions)
: payload.caseSetFile
? payload.caseSetFile
: undefined;
if (!runtimeCaseSetFile) {
throw new http_1.ApiError("ASYNC_CASESET_REQUIRED", "Async assistant_stage1 run requires caseSetFile or explicit questions[] payload.", 400);
}
const caseSeeds = readAssistantSuiteCaseSeeds(runtimeCaseSetFile);
if (caseSeeds.length === 0) {
throw new http_1.ApiError("ASYNC_CASESET_EMPTY", "No runnable cases found in selected case-set.", 400);
}
const nowIso = new Date().toISOString();
const job = {
job_id: jobId,
status: "queued",
created_at: nowIso,
updated_at: nowIso,
eval_target: payload.evalTarget,
run_id: runId,
case_set_file: runtimeCaseSetFile,
total_cases: caseSeeds.length,
completed_cases: 0,
cases: caseSeeds.map((item) => ({
case_id: item.case_id,
turns_total: item.turns_total,
status: "queued",
messages: []
})),
error: null,
report: null
};
ASYNC_JOBS.set(job.job_id, job);
trimAsyncJobsStore();
setImmediate(() => {
void (async () => {
const target = ASYNC_JOBS.get(job.job_id);
if (!target)
return;
target.status = "running";
target.updated_at = new Date().toISOString();
try {
const report = await services.evalService.run({
...payload,
caseSetFile: runtimeCaseSetFile,
runId
});
target.report = report;
syncJobWithSessions(target);
target.completed_cases = target.total_cases;
target.status = "completed";
target.updated_at = new Date().toISOString();
}
catch (error) {
syncJobWithSessions(target);
target.status = "failed";
target.error = error instanceof Error ? error.message : String(error);
target.updated_at = new Date().toISOString();
}
})();
});
(0, http_1.ok)(res, {
ok: true,
job: snapshotJob(job)
});
}
catch (error) {
next(error);
}
});
router.get("/api/eval/run-async/:job_id", (req, res, next) => {
try {
const jobId = String(req.params.job_id ?? "").trim();
if (!jobId) {
throw new http_1.ApiError("INVALID_ASYNC_JOB_ID", "job_id is required.", 400);
}
const job = ASYNC_JOBS.get(jobId);
if (!job) {
throw new http_1.ApiError("ASYNC_JOB_NOT_FOUND", `Async eval job not found: ${jobId}`, 404);
}
syncJobWithSessions(job);
job.updated_at = new Date().toISOString();
(0, http_1.ok)(res, {
ok: true,
job: snapshotJob(job)
});
}
catch (error) {
next(error);
}
});
return router;
}