NODEDC_1C/llm_normalizer/backend/dist/routes/autoRuns.js

691 lines
27 KiB
JavaScript

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildAutoRunsRouter = buildAutoRunsRouter;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const express_1 = require("express");
const config_1 = require("../config");
const http_1 = require("../utils/http");
function toRecord(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function toArray(value) {
return Array.isArray(value) ? value : [];
}
function toStringSafe(value) {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toNumberSafe(value) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function toBooleanSafe(value) {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "string") {
const lowered = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(lowered))
return true;
if (["0", "false", "no", "off"].includes(lowered))
return false;
}
return null;
}
function parseDateMs(value) {
const asString = toStringSafe(value);
if (!asString) {
return null;
}
const ms = Date.parse(asString);
return Number.isFinite(ms) ? ms : null;
}
function clampInt(value, min, max, fallback) {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
const rounded = Math.trunc(value);
if (rounded < min)
return min;
if (rounded > max)
return max;
return rounded;
}
function resolveRunTarget(input) {
const explicit = toStringSafe(input.report.eval_target);
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
return explicit;
}
if (input.runId.startsWith("assistant-stage1-"))
return "assistant_stage1";
if (input.runId.startsWith("assistant-stage2-"))
return "assistant_stage2";
if (input.runId.startsWith("assistant-p0-"))
return "assistant_p0";
if (input.runId.startsWith("eval-"))
return "normalizer";
if (input.reportPath.endsWith(".report.json"))
return "normalizer";
return "unknown";
}
function normalizeTimestamp(report, fileMtimeMs) {
const first = parseDateMs(report.run_timestamp);
if (first !== null) {
return { iso: new Date(first).toISOString(), ms: first };
}
const second = parseDateMs(report.timestamp);
if (second !== null) {
return { iso: new Date(second).toISOString(), ms: second };
}
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
}
function rateToPercent(value) {
if (value === null)
return null;
if (value <= 1.2)
return Math.max(0, Math.min(100, value * 100));
return Math.max(0, Math.min(100, value));
}
function scoreToPercent(value) {
if (value === null)
return null;
if (value <= 5.2)
return Math.max(0, Math.min(100, (value / 5) * 100));
return Math.max(0, Math.min(100, value));
}
function average(values) {
const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item));
if (filtered.length === 0) {
return null;
}
const sum = filtered.reduce((acc, item) => acc + item, 0);
return Number((sum / filtered.length).toFixed(2));
}
function getMetricRecord(report) {
const metrics = toRecord(report.metrics);
if (!metrics)
return null;
const raw = toRecord(metrics.raw);
return raw ?? metrics;
}
function computeScoreIndex(report, target) {
const metrics = getMetricRecord(report);
if (!metrics) {
return null;
}
if (target === "assistant_p0") {
return average([
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
]);
}
if (target === "assistant_stage1") {
return average([
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score))
]);
}
if (target === "assistant_stage2") {
return average([
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
]);
}
return average([
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
]);
}
function countFailures(report) {
const acceptanceGate = toRecord(report.acceptance_gate);
const baselineGate = toRecord(report.baseline_stability_gate);
const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
const quality = toArray(acceptanceGate?.quality_failures).length +
toArray(baselineGate?.legacy_quality_failures).length +
toArray(baselineGate?.quality_gap_failures).length;
return { blocking, quality };
}
function caseScoreFromMetricSubscores(metricSubscores) {
if (!metricSubscores)
return null;
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
if (directProduct !== null) {
return Number(directProduct.toFixed(2));
}
const candidates = [
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
];
return average(candidates);
}
function isCaseClosed(input) {
const checks = input.checks;
if (checks) {
const routeCorrect = toBooleanSafe(checks.route_correct);
const domainPure = toBooleanSafe(checks.domain_pure);
const problemFirst = toBooleanSafe(checks.problem_first_answer);
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
if (routeCorrect === false)
return false;
if (domainPure === false)
return false;
if (problemFirst === false)
return false;
return true;
}
}
if (typeof input.scoreIndex === "number") {
return input.scoreIndex >= 65;
}
return null;
}
function getResultCases(report) {
return toArray(report.results)
.map((item) => toRecord(item))
.filter((item) => item !== null);
}
function buildCaseSummaries(report, runId, checkDialogAvailability) {
const results = getResultCases(report);
return results.map((item, index) => {
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
const checks = toRecord(item.checks);
const metricSubscores = toRecord(item.metric_subscores);
const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ??
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
null;
const closedState = isCaseClosed({ checks, scoreIndex });
const sessionId = `${runId}-${caseId}`;
const dialogAvailable = checkDialogAvailability
? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
: false;
return {
case_id: caseId,
domain: toStringSafe(item.domain),
query_class: toStringSafe(item.query_class),
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type),
session_id: sessionId,
dialog_available: dialogAvailable,
checks,
metric_subscores: metricSubscores
};
});
}
function buildCoverageFromCases(cases) {
const coverageByDomain = new Map();
let closedCases = 0;
let openCases = 0;
for (const item of cases) {
if (item.status === "closed")
closedCases += 1;
if (item.status === "open")
openCases += 1;
const domainKey = item.domain ?? "unknown";
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
current.total += 1;
if (item.status === "closed")
current.closed += 1;
coverageByDomain.set(domainKey, current);
}
const domainCoverage = Array.from(coverageByDomain.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
return {
closed_cases: closedCases,
open_cases: openCases,
domain_coverage: domainCoverage
};
}
function collectJsonCandidates(scanLimit) {
const candidates = [];
const sources = [
{ dir: config_1.REPORTS_DIR, suffix: ".json" },
{ dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" }
];
for (const source of sources) {
if (!fs_1.default.existsSync(source.dir))
continue;
const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile())
continue;
if (!entry.name.endsWith(source.suffix))
continue;
const fullPath = path_1.default.resolve(source.dir, entry.name);
try {
const stat = fs_1.default.statSync(fullPath);
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
}
catch {
// skip broken file stat
}
}
}
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
}
function indexRuns(scanLimit) {
const files = collectJsonCandidates(scanLimit);
const dedup = new Map();
for (const item of files) {
let parsed;
try {
const raw = fs_1.default.readFileSync(item.path, "utf-8");
parsed = JSON.parse(raw);
}
catch {
continue;
}
const report = toRecord(parsed);
if (!report)
continue;
const runId = toStringSafe(report.run_id);
if (!runId)
continue;
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
const indexed = {
run_id: runId,
eval_target: evalTarget,
report_path: item.path,
report,
timestamp_iso: normalizedTime.iso,
timestamp_ms: normalizedTime.ms
};
const current = dedup.get(runId);
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
dedup.set(runId, indexed);
}
}
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
}
function parseFilters(query) {
const fromMs = parseDateMs(query.from);
const toMs = parseDateMs(query.to);
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
? targetRaw
: "all";
const useMock = toStringSafe(query.use_mock);
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
return {
from_ms: fromMs,
to_ms: toMs,
target,
use_mock: useMockFilter,
prompt_contains: promptContains,
mode,
limit,
scan_limit: scanLimit
};
}
function matchesFilters(run, filters) {
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms)
return false;
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms)
return false;
if (filters.target !== "all" && run.eval_target !== filters.target)
return false;
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
if (filters.mode !== "all" && modeValue !== filters.mode)
return false;
if (filters.use_mock !== null) {
const useMockValue = toBooleanSafe(run.report.use_mock);
if (useMockValue !== filters.use_mock)
return false;
}
if (filters.prompt_contains.length > 0) {
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
if (!promptVersion.includes(filters.prompt_contains))
return false;
}
return true;
}
function buildRunSummary(run) {
const connection = toRecord(run.report.connection);
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
const llmProvider = toStringSafe(run.report.llm_provider) ??
toStringSafe(run.report.llmProvider) ??
toStringSafe(connection?.llm_provider) ??
toStringSafe(connection?.llmProvider) ??
toStringSafe(normalizeConfig?.llm_provider) ??
toStringSafe(normalizeConfig?.llmProvider);
const model = toStringSafe(run.report.model) ??
toStringSafe(connection?.model) ??
toStringSafe(normalizeConfig?.model);
const cases = buildCaseSummaries(run.report, run.run_id, false);
const coverage = buildCoverageFromCases(cases);
const failures = countFailures(run.report);
return {
run_id: run.run_id,
eval_target: run.eval_target,
run_timestamp: run.timestamp_iso,
mode: toStringSafe(run.report.mode),
llm_provider: llmProvider,
model,
use_mock: toBooleanSafe(run.report.use_mock),
prompt_version: toStringSafe(run.report.prompt_version),
schema_version: toStringSafe(run.report.schema_version),
suite_id: toStringSafe(run.report.suite_id),
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
report_path: run.report_path,
score_index: computeScoreIndex(run.report, run.eval_target),
blocking_failures: failures.blocking,
quality_failures: failures.quality,
closed_cases: coverage.closed_cases,
open_cases: coverage.open_cases,
domain_coverage: coverage.domain_coverage
};
}
function mergeDomainCoverage(summaries) {
const merged = new Map();
for (const summary of summaries) {
for (const item of summary.domain_coverage) {
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
current.total += item.total_cases;
current.closed += item.closed_cases;
merged.set(item.domain, current);
}
}
return Array.from(merged.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
}
function buildHistoryStats(summaries) {
const byTarget = {};
let blockingRuns = 0;
let qualityRuns = 0;
const scoreValues = [];
for (const item of summaries) {
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
if (item.blocking_failures > 0)
blockingRuns += 1;
if (item.quality_failures > 0)
qualityRuns += 1;
if (typeof item.score_index === "number")
scoreValues.push(item.score_index);
}
const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null;
const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null;
const trend = latestScore === null || previousScore === null
? "flat"
: latestScore > previousScore + 0.5
? "up"
: latestScore < previousScore - 0.5
? "down"
: "flat";
return {
runs_total: summaries.length,
by_target: byTarget,
blocking_runs: blockingRuns,
quality_gap_runs: qualityRuns,
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
latest_score_index: latestScore,
previous_score_index: previousScore,
trend,
domain_coverage: mergeDomainCoverage(summaries)
};
}
function findRunById(runId, scanLimit = 3000) {
const indexed = indexRuns(scanLimit);
return indexed.find((item) => item.run_id === runId) ?? null;
}
function buildAssistantModeSummary(dialogRecord) {
if (!dialogRecord)
return null;
const conversation = toArray(dialogRecord.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastAssistant = [...conversation]
.reverse()
.find((item) => toStringSafe(item.role) === "assistant");
const debug = toRecord(lastAssistant?.debug);
return {
reply_type: toStringSafe(lastAssistant?.reply_type),
trace_id: toStringSafe(lastAssistant?.trace_id),
detected_mode: toStringSafe(debug?.detected_mode),
execution_lane: toStringSafe(debug?.execution_lane),
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
living_router_mode: toStringSafe(debug?.living_router_mode),
fallback_type: toStringSafe(debug?.fallback_type)
};
}
function loadSessionDialog(runId, caseId) {
const sessionId = `${runId}-${caseId}`;
const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
if (!fs_1.default.existsSync(filePath)) {
return null;
}
let parsed;
try {
parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
}
catch {
return null;
}
const record = toRecord(parsed);
if (!record)
return null;
const conversation = toArray(record.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const messages = conversation.map((item) => ({
role: toStringSafe(item.role) ?? "unknown",
text: toStringSafe(item.text) ?? "",
created_at: toStringSafe(item.created_at),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type)
}));
const turns = toArray(record.turns)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
const humanReadable = toRecord(lastTurn?.human_readable);
const decomposition = toArray(humanReadable?.decomposition)
.map((item) => toStringSafe(item))
.filter((item) => item !== null);
return {
source: "assistant_session",
session_id: sessionId,
messages,
decomposition,
assistant_mode: buildAssistantModeSummary(record)
};
}
function buildFallbackDialog(run, caseId) {
const sessionId = `${run.run_id}-${caseId}`;
const results = getResultCases(run.report);
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
if (!targetCase) {
return {
source: "none",
session_id: sessionId,
messages: [],
decomposition: [],
assistant_mode: null
};
}
const userText = toStringSafe(targetCase.raw_question) ??
toStringSafe(targetCase.user_query_raw) ??
`Case ${caseId}`;
const assistantSummaryParts = [];
const validationPassed = toBooleanSafe(targetCase.validation_passed);
if (validationPassed !== null)
assistantSummaryParts.push(`validation_passed=${validationPassed}`);
const routeMatch = toBooleanSafe(targetCase.route_match);
if (routeMatch !== null)
assistantSummaryParts.push(`route_match=${routeMatch}`);
const intentMatch = toBooleanSafe(targetCase.intent_match);
if (intentMatch !== null)
assistantSummaryParts.push(`intent_match=${intentMatch}`);
const confidence = toStringSafe(targetCase.confidence_overall);
if (confidence)
assistantSummaryParts.push(`confidence=${confidence}`);
const metricSubscores = toRecord(targetCase.metric_subscores);
if (metricSubscores) {
for (const [key, value] of Object.entries(metricSubscores)) {
if (toNumberSafe(value) !== null) {
assistantSummaryParts.push(`${key}=${value}`);
}
}
}
if (assistantSummaryParts.length === 0) {
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
}
return {
source: "report_fallback",
session_id: sessionId,
messages: [
{
role: "user",
text: userText,
created_at: null,
trace_id: null,
reply_type: null
},
{
role: "assistant",
text: assistantSummaryParts.join("\n"),
created_at: null,
trace_id: toStringSafe(targetCase.trace_id),
reply_type: toStringSafe(targetCase.reply_type)
}
],
decomposition: [],
assistant_mode: null
};
}
function buildAutoRunsRouter() {
const router = (0, express_1.Router)();
router.get("/api/autoruns/history", (req, res) => {
const filters = parseFilters(req.query);
const indexed = indexRuns(filters.scan_limit);
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
const summaries = filtered.map((run) => buildRunSummary(run));
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort();
const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort();
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
target: filters.target,
use_mock: filters.use_mock,
prompt_contains: filters.prompt_contains,
mode: filters.mode,
limit: filters.limit,
scan_limit: filters.scan_limit
},
available: {
targets: availableTargets,
modes: availableModes,
prompt_versions: availablePromptVersions
},
items: summaries,
stats: buildHistoryStats(summaries)
});
});
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
if (!runId) {
throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const cases = buildCaseSummaries(run.report, run.run_id, true);
const coverage = buildCoverageFromCases(cases);
(0, http_1.ok)(res, {
ok: true,
run: buildRunSummary(run),
coverage,
cases,
report: run.report
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
const caseId = String(req.params.case_id ?? "").trim();
if (!runId || !caseId) {
throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
(0, http_1.ok)(res, {
ok: true,
run_id: runId,
case_id: caseId,
...dialog
});
}
catch (error) {
next(error);
}
});
return router;
}