"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildAutoRunsRouter = buildAutoRunsRouter; const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const express_1 = require("express"); const config_1 = require("../config"); const http_1 = require("../utils/http"); function toRecord(value) { if (!value || typeof value !== "object" || Array.isArray(value)) { return null; } return value; } function toArray(value) { return Array.isArray(value) ? value : []; } function toStringSafe(value) { if (typeof value !== "string") { return null; } const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } function toNumberSafe(value) { if (typeof value === "number" && Number.isFinite(value)) { return value; } if (typeof value === "string" && value.trim().length > 0) { const parsed = Number(value); return Number.isFinite(parsed) ? parsed : null; } return null; } function toBooleanSafe(value) { if (typeof value === "boolean") { return value; } if (typeof value === "string") { const lowered = value.trim().toLowerCase(); if (["1", "true", "yes", "on"].includes(lowered)) return true; if (["0", "false", "no", "off"].includes(lowered)) return false; } return null; } function parseDateMs(value) { const asString = toStringSafe(value); if (!asString) { return null; } const ms = Date.parse(asString); return Number.isFinite(ms) ? ms : null; } function clampInt(value, min, max, fallback) { if (value === null || !Number.isFinite(value)) { return fallback; } const rounded = Math.trunc(value); if (rounded < min) return min; if (rounded > max) return max; return rounded; } function resolveRunTarget(input) { const explicit = toStringSafe(input.report.eval_target); if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") { return explicit; } if (input.runId.startsWith("assistant-stage1-")) return "assistant_stage1"; if (input.runId.startsWith("assistant-stage2-")) return "assistant_stage2"; if (input.runId.startsWith("assistant-p0-")) return "assistant_p0"; if (input.runId.startsWith("eval-")) return "normalizer"; if (input.reportPath.endsWith(".report.json")) return "normalizer"; return "unknown"; } function normalizeTimestamp(report, fileMtimeMs) { const first = parseDateMs(report.run_timestamp); if (first !== null) { return { iso: new Date(first).toISOString(), ms: first }; } const second = parseDateMs(report.timestamp); if (second !== null) { return { iso: new Date(second).toISOString(), ms: second }; } return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs }; } function rateToPercent(value) { if (value === null) return null; if (value <= 1.2) return Math.max(0, Math.min(100, value * 100)); return Math.max(0, Math.min(100, value)); } function scoreToPercent(value) { if (value === null) return null; if (value <= 5.2) return Math.max(0, Math.min(100, (value / 5) * 100)); return Math.max(0, Math.min(100, value)); } function average(values) { const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item)); if (filtered.length === 0) { return null; } const sum = filtered.reduce((acc, item) => acc + item, 0); return Number((sum / filtered.length).toFixed(2)); } function getMetricRecord(report) { const metrics = toRecord(report.metrics); if (!metrics) return null; const raw = toRecord(metrics.raw); return raw ?? metrics; } function computeScoreIndex(report, target) { const metrics = getMetricRecord(report); if (!metrics) { return null; } if (target === "assistant_p0") { return average([ rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)), scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)), rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)), rateToPercent(toNumberSafe(metrics.route_correctness_rate)), rateToPercent(toNumberSafe(metrics.domain_purity_rate)), rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)), rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate)) ]); } if (target === "assistant_stage1") { return average([ rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)), rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)), rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)), rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)), scoreToPercent(toNumberSafe(metrics.followup_context_retention_score)) ]); } if (target === "assistant_stage2") { return average([ rateToPercent(toNumberSafe(metrics.problem_unit_precision)), rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)), rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)), scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)), scoreToPercent(toNumberSafe(metrics.problem_clarity_score)), rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)), rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)) ]); } return average([ rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)), rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)), rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)), rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0)) ]); } function countFailures(report) { const acceptanceGate = toRecord(report.acceptance_gate); const baselineGate = toRecord(report.baseline_stability_gate); const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length; const quality = toArray(acceptanceGate?.quality_failures).length + toArray(baselineGate?.legacy_quality_failures).length + toArray(baselineGate?.quality_gap_failures).length; return { blocking, quality }; } function caseScoreFromMetricSubscores(metricSubscores) { if (!metricSubscores) return null; const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score)); if (directProduct !== null) { return Number(directProduct.toFixed(2)); } const candidates = [ scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)), scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)), rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)), rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)), scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score)) ]; return average(candidates); } function isCaseClosed(input) { const checks = input.checks; if (checks) { const routeCorrect = toBooleanSafe(checks.route_correct); const domainPure = toBooleanSafe(checks.domain_pure); const problemFirst = toBooleanSafe(checks.problem_first_answer); if (routeCorrect !== null || domainPure !== null || problemFirst !== null) { if (routeCorrect === false) return false; if (domainPure === false) return false; if (problemFirst === false) return false; return true; } } if (typeof input.scoreIndex === "number") { return input.scoreIndex >= 65; } return null; } function getResultCases(report) { return toArray(report.results) .map((item) => toRecord(item)) .filter((item) => item !== null); } function buildCaseSummaries(report, runId, checkDialogAvailability) { const results = getResultCases(report); return results.map((item, index) => { const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`; const checks = toRecord(item.checks); const metricSubscores = toRecord(item.metric_subscores); const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ?? scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ?? null; const closedState = isCaseClosed({ checks, scoreIndex }); const sessionId = `${runId}-${caseId}`; const dialogAvailable = checkDialogAvailability ? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`)) : false; return { case_id: caseId, domain: toStringSafe(item.domain), query_class: toStringSafe(item.query_class), status: closedState === null ? "unknown" : closedState ? "closed" : "open", score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)), trace_id: toStringSafe(item.trace_id), reply_type: toStringSafe(item.reply_type), session_id: sessionId, dialog_available: dialogAvailable, checks, metric_subscores: metricSubscores }; }); } function buildCoverageFromCases(cases) { const coverageByDomain = new Map(); let closedCases = 0; let openCases = 0; for (const item of cases) { if (item.status === "closed") closedCases += 1; if (item.status === "open") openCases += 1; const domainKey = item.domain ?? "unknown"; const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 }; current.total += 1; if (item.status === "closed") current.closed += 1; coverageByDomain.set(domainKey, current); } const domainCoverage = Array.from(coverageByDomain.entries()) .map(([domain, value]) => ({ domain, total_cases: value.total, closed_cases: value.closed })) .sort((a, b) => b.total_cases - a.total_cases); return { closed_cases: closedCases, open_cases: openCases, domain_coverage: domainCoverage }; } function collectJsonCandidates(scanLimit) { const candidates = []; const sources = [ { dir: config_1.REPORTS_DIR, suffix: ".json" }, { dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" } ]; for (const source of sources) { if (!fs_1.default.existsSync(source.dir)) continue; const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true }); for (const entry of entries) { if (!entry.isFile()) continue; if (!entry.name.endsWith(source.suffix)) continue; const fullPath = path_1.default.resolve(source.dir, entry.name); try { const stat = fs_1.default.statSync(fullPath); candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs }); } catch { // skip broken file stat } } } return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit); } function indexRuns(scanLimit) { const files = collectJsonCandidates(scanLimit); const dedup = new Map(); for (const item of files) { let parsed; try { const raw = fs_1.default.readFileSync(item.path, "utf-8"); parsed = JSON.parse(raw); } catch { continue; } const report = toRecord(parsed); if (!report) continue; const runId = toStringSafe(report.run_id); if (!runId) continue; const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path }); const normalizedTime = normalizeTimestamp(report, item.mtimeMs); const indexed = { run_id: runId, eval_target: evalTarget, report_path: item.path, report, timestamp_iso: normalizedTime.iso, timestamp_ms: normalizedTime.ms }; const current = dedup.get(runId); if (!current || indexed.timestamp_ms > current.timestamp_ms) { dedup.set(runId, indexed); } } return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms); } function parseFilters(query) { const fromMs = parseDateMs(query.from); const toMs = parseDateMs(query.to); const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all"; const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0" ? targetRaw : "all"; const useMock = toStringSafe(query.use_mock); const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock); const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all"; const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase(); const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120); const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900); return { from_ms: fromMs, to_ms: toMs, target, use_mock: useMockFilter, prompt_contains: promptContains, mode, limit, scan_limit: scanLimit }; } function matchesFilters(run, filters) { if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms) return false; if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms) return false; if (filters.target !== "all" && run.eval_target !== filters.target) return false; const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase(); if (filters.mode !== "all" && modeValue !== filters.mode) return false; if (filters.use_mock !== null) { const useMockValue = toBooleanSafe(run.report.use_mock); if (useMockValue !== filters.use_mock) return false; } if (filters.prompt_contains.length > 0) { const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase(); if (!promptVersion.includes(filters.prompt_contains)) return false; } return true; } function buildRunSummary(run) { const connection = toRecord(run.report.connection); const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig); const llmProvider = toStringSafe(run.report.llm_provider) ?? toStringSafe(run.report.llmProvider) ?? toStringSafe(connection?.llm_provider) ?? toStringSafe(connection?.llmProvider) ?? toStringSafe(normalizeConfig?.llm_provider) ?? toStringSafe(normalizeConfig?.llmProvider); const model = toStringSafe(run.report.model) ?? toStringSafe(connection?.model) ?? toStringSafe(normalizeConfig?.model); const cases = buildCaseSummaries(run.report, run.run_id, false); const coverage = buildCoverageFromCases(cases); const failures = countFailures(run.report); return { run_id: run.run_id, eval_target: run.eval_target, run_timestamp: run.timestamp_iso, mode: toStringSafe(run.report.mode), llm_provider: llmProvider, model, use_mock: toBooleanSafe(run.report.use_mock), prompt_version: toStringSafe(run.report.prompt_version), schema_version: toStringSafe(run.report.schema_version), suite_id: toStringSafe(run.report.suite_id), cases_total: toNumberSafe(run.report.cases_total) ?? cases.length, requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total), report_path: run.report_path, score_index: computeScoreIndex(run.report, run.eval_target), blocking_failures: failures.blocking, quality_failures: failures.quality, closed_cases: coverage.closed_cases, open_cases: coverage.open_cases, domain_coverage: coverage.domain_coverage }; } function mergeDomainCoverage(summaries) { const merged = new Map(); for (const summary of summaries) { for (const item of summary.domain_coverage) { const current = merged.get(item.domain) ?? { total: 0, closed: 0 }; current.total += item.total_cases; current.closed += item.closed_cases; merged.set(item.domain, current); } } return Array.from(merged.entries()) .map(([domain, value]) => ({ domain, total_cases: value.total, closed_cases: value.closed })) .sort((a, b) => b.total_cases - a.total_cases); } function buildHistoryStats(summaries) { const byTarget = {}; let blockingRuns = 0; let qualityRuns = 0; const scoreValues = []; for (const item of summaries) { byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1; if (item.blocking_failures > 0) blockingRuns += 1; if (item.quality_failures > 0) qualityRuns += 1; if (typeof item.score_index === "number") scoreValues.push(item.score_index); } const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null; const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null; const trend = latestScore === null || previousScore === null ? "flat" : latestScore > previousScore + 0.5 ? "up" : latestScore < previousScore - 0.5 ? "down" : "flat"; return { runs_total: summaries.length, by_target: byTarget, blocking_runs: blockingRuns, quality_gap_runs: qualityRuns, avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null, latest_score_index: latestScore, previous_score_index: previousScore, trend, domain_coverage: mergeDomainCoverage(summaries) }; } function findRunById(runId, scanLimit = 3000) { const indexed = indexRuns(scanLimit); return indexed.find((item) => item.run_id === runId) ?? null; } function buildAssistantModeSummary(dialogRecord) { if (!dialogRecord) return null; const conversation = toArray(dialogRecord.conversation) .map((item) => toRecord(item)) .filter((item) => item !== null); const lastAssistant = [...conversation] .reverse() .find((item) => toStringSafe(item.role) === "assistant"); const debug = toRecord(lastAssistant?.debug); return { reply_type: toStringSafe(lastAssistant?.reply_type), trace_id: toStringSafe(lastAssistant?.trace_id), detected_mode: toStringSafe(debug?.detected_mode), execution_lane: toStringSafe(debug?.execution_lane), tool_gate_decision: toStringSafe(debug?.tool_gate_decision), living_router_mode: toStringSafe(debug?.living_router_mode), fallback_type: toStringSafe(debug?.fallback_type) }; } function loadSessionDialog(runId, caseId) { const sessionId = `${runId}-${caseId}`; const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`); if (!fs_1.default.existsSync(filePath)) { return null; } let parsed; try { parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8")); } catch { return null; } const record = toRecord(parsed); if (!record) return null; const conversation = toArray(record.conversation) .map((item) => toRecord(item)) .filter((item) => item !== null); const messages = conversation.map((item) => ({ role: toStringSafe(item.role) ?? "unknown", text: toStringSafe(item.text) ?? "", created_at: toStringSafe(item.created_at), trace_id: toStringSafe(item.trace_id), reply_type: toStringSafe(item.reply_type) })); const turns = toArray(record.turns) .map((item) => toRecord(item)) .filter((item) => item !== null); const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null; const humanReadable = toRecord(lastTurn?.human_readable); const decomposition = toArray(humanReadable?.decomposition) .map((item) => toStringSafe(item)) .filter((item) => item !== null); return { source: "assistant_session", session_id: sessionId, messages, decomposition, assistant_mode: buildAssistantModeSummary(record) }; } function buildFallbackDialog(run, caseId) { const sessionId = `${run.run_id}-${caseId}`; const results = getResultCases(run.report); const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null; if (!targetCase) { return { source: "none", session_id: sessionId, messages: [], decomposition: [], assistant_mode: null }; } const userText = toStringSafe(targetCase.raw_question) ?? toStringSafe(targetCase.user_query_raw) ?? `Case ${caseId}`; const assistantSummaryParts = []; const validationPassed = toBooleanSafe(targetCase.validation_passed); if (validationPassed !== null) assistantSummaryParts.push(`validation_passed=${validationPassed}`); const routeMatch = toBooleanSafe(targetCase.route_match); if (routeMatch !== null) assistantSummaryParts.push(`route_match=${routeMatch}`); const intentMatch = toBooleanSafe(targetCase.intent_match); if (intentMatch !== null) assistantSummaryParts.push(`intent_match=${intentMatch}`); const confidence = toStringSafe(targetCase.confidence_overall); if (confidence) assistantSummaryParts.push(`confidence=${confidence}`); const metricSubscores = toRecord(targetCase.metric_subscores); if (metricSubscores) { for (const [key, value] of Object.entries(metricSubscores)) { if (toNumberSafe(value) !== null) { assistantSummaryParts.push(`${key}=${value}`); } } } if (assistantSummaryParts.length === 0) { assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts."); } return { source: "report_fallback", session_id: sessionId, messages: [ { role: "user", text: userText, created_at: null, trace_id: null, reply_type: null }, { role: "assistant", text: assistantSummaryParts.join("\n"), created_at: null, trace_id: toStringSafe(targetCase.trace_id), reply_type: toStringSafe(targetCase.reply_type) } ], decomposition: [], assistant_mode: null }; } function buildAutoRunsRouter() { const router = (0, express_1.Router)(); router.get("/api/autoruns/history", (req, res) => { const filters = parseFilters(req.query); const indexed = indexRuns(filters.scan_limit); const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit); const summaries = filtered.map((run) => buildRunSummary(run)); const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort(); const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort(); const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort(); (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), filters_applied: { from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(), to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(), target: filters.target, use_mock: filters.use_mock, prompt_contains: filters.prompt_contains, mode: filters.mode, limit: filters.limit, scan_limit: filters.scan_limit }, available: { targets: availableTargets, modes: availableModes, prompt_versions: availablePromptVersions }, items: summaries, stats: buildHistoryStats(summaries) }); }); router.get("/api/autoruns/history/:run_id", (req, res, next) => { try { const runId = String(req.params.run_id ?? "").trim(); if (!runId) { throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400); } const run = findRunById(runId); if (!run) { throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404); } const cases = buildCaseSummaries(run.report, run.run_id, true); const coverage = buildCoverageFromCases(cases); (0, http_1.ok)(res, { ok: true, run: buildRunSummary(run), coverage, cases, report: run.report }); } catch (error) { next(error); } }); router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => { try { const runId = String(req.params.run_id ?? "").trim(); const caseId = String(req.params.case_id ?? "").trim(); if (!runId || !caseId) { throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400); } const run = findRunById(runId); if (!run) { throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404); } const sessionDialog = loadSessionDialog(runId, caseId); const dialog = sessionDialog ?? buildFallbackDialog(run, caseId); (0, http_1.ok)(res, { ok: true, run_id: runId, case_id: caseId, ...dialog }); } catch (error) { next(error); } }); return router; }