691 lines
27 KiB
JavaScript
691 lines
27 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.buildAutoRunsRouter = buildAutoRunsRouter;
|
|
const fs_1 = __importDefault(require("fs"));
|
|
const path_1 = __importDefault(require("path"));
|
|
const express_1 = require("express");
|
|
const config_1 = require("../config");
|
|
const http_1 = require("../utils/http");
|
|
function toRecord(value) {
|
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
return null;
|
|
}
|
|
return value;
|
|
}
|
|
function toArray(value) {
|
|
return Array.isArray(value) ? value : [];
|
|
}
|
|
function toStringSafe(value) {
|
|
if (typeof value !== "string") {
|
|
return null;
|
|
}
|
|
const trimmed = value.trim();
|
|
return trimmed.length > 0 ? trimmed : null;
|
|
}
|
|
function toNumberSafe(value) {
|
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
return value;
|
|
}
|
|
if (typeof value === "string" && value.trim().length > 0) {
|
|
const parsed = Number(value);
|
|
return Number.isFinite(parsed) ? parsed : null;
|
|
}
|
|
return null;
|
|
}
|
|
function toBooleanSafe(value) {
|
|
if (typeof value === "boolean") {
|
|
return value;
|
|
}
|
|
if (typeof value === "string") {
|
|
const lowered = value.trim().toLowerCase();
|
|
if (["1", "true", "yes", "on"].includes(lowered))
|
|
return true;
|
|
if (["0", "false", "no", "off"].includes(lowered))
|
|
return false;
|
|
}
|
|
return null;
|
|
}
|
|
function parseDateMs(value) {
|
|
const asString = toStringSafe(value);
|
|
if (!asString) {
|
|
return null;
|
|
}
|
|
const ms = Date.parse(asString);
|
|
return Number.isFinite(ms) ? ms : null;
|
|
}
|
|
function clampInt(value, min, max, fallback) {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return fallback;
|
|
}
|
|
const rounded = Math.trunc(value);
|
|
if (rounded < min)
|
|
return min;
|
|
if (rounded > max)
|
|
return max;
|
|
return rounded;
|
|
}
|
|
function resolveRunTarget(input) {
|
|
const explicit = toStringSafe(input.report.eval_target);
|
|
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
|
|
return explicit;
|
|
}
|
|
if (input.runId.startsWith("assistant-stage1-"))
|
|
return "assistant_stage1";
|
|
if (input.runId.startsWith("assistant-stage2-"))
|
|
return "assistant_stage2";
|
|
if (input.runId.startsWith("assistant-p0-"))
|
|
return "assistant_p0";
|
|
if (input.runId.startsWith("eval-"))
|
|
return "normalizer";
|
|
if (input.reportPath.endsWith(".report.json"))
|
|
return "normalizer";
|
|
return "unknown";
|
|
}
|
|
function normalizeTimestamp(report, fileMtimeMs) {
|
|
const first = parseDateMs(report.run_timestamp);
|
|
if (first !== null) {
|
|
return { iso: new Date(first).toISOString(), ms: first };
|
|
}
|
|
const second = parseDateMs(report.timestamp);
|
|
if (second !== null) {
|
|
return { iso: new Date(second).toISOString(), ms: second };
|
|
}
|
|
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
|
|
}
|
|
function rateToPercent(value) {
|
|
if (value === null)
|
|
return null;
|
|
if (value <= 1.2)
|
|
return Math.max(0, Math.min(100, value * 100));
|
|
return Math.max(0, Math.min(100, value));
|
|
}
|
|
function scoreToPercent(value) {
|
|
if (value === null)
|
|
return null;
|
|
if (value <= 5.2)
|
|
return Math.max(0, Math.min(100, (value / 5) * 100));
|
|
return Math.max(0, Math.min(100, value));
|
|
}
|
|
function average(values) {
|
|
const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item));
|
|
if (filtered.length === 0) {
|
|
return null;
|
|
}
|
|
const sum = filtered.reduce((acc, item) => acc + item, 0);
|
|
return Number((sum / filtered.length).toFixed(2));
|
|
}
|
|
function getMetricRecord(report) {
|
|
const metrics = toRecord(report.metrics);
|
|
if (!metrics)
|
|
return null;
|
|
const raw = toRecord(metrics.raw);
|
|
return raw ?? metrics;
|
|
}
|
|
function computeScoreIndex(report, target) {
|
|
const metrics = getMetricRecord(report);
|
|
if (!metrics) {
|
|
return null;
|
|
}
|
|
if (target === "assistant_p0") {
|
|
return average([
|
|
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
|
|
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
|
|
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
|
|
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
|
|
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
|
|
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
|
|
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
|
|
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
|
|
]);
|
|
}
|
|
if (target === "assistant_stage1") {
|
|
return average([
|
|
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
|
|
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
|
|
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
|
|
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
|
|
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
|
|
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
|
|
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score))
|
|
]);
|
|
}
|
|
if (target === "assistant_stage2") {
|
|
return average([
|
|
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
|
|
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
|
|
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
|
|
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
|
|
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
|
|
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
|
|
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
|
|
]);
|
|
}
|
|
return average([
|
|
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
|
|
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
|
|
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
|
|
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
|
|
]);
|
|
}
|
|
function countFailures(report) {
|
|
const acceptanceGate = toRecord(report.acceptance_gate);
|
|
const baselineGate = toRecord(report.baseline_stability_gate);
|
|
const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
|
|
const quality = toArray(acceptanceGate?.quality_failures).length +
|
|
toArray(baselineGate?.legacy_quality_failures).length +
|
|
toArray(baselineGate?.quality_gap_failures).length;
|
|
return { blocking, quality };
|
|
}
|
|
function caseScoreFromMetricSubscores(metricSubscores) {
|
|
if (!metricSubscores)
|
|
return null;
|
|
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
|
|
if (directProduct !== null) {
|
|
return Number(directProduct.toFixed(2));
|
|
}
|
|
const candidates = [
|
|
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
|
|
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
|
|
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
|
|
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
|
|
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
|
|
];
|
|
return average(candidates);
|
|
}
|
|
function isCaseClosed(input) {
|
|
const checks = input.checks;
|
|
if (checks) {
|
|
const routeCorrect = toBooleanSafe(checks.route_correct);
|
|
const domainPure = toBooleanSafe(checks.domain_pure);
|
|
const problemFirst = toBooleanSafe(checks.problem_first_answer);
|
|
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
|
|
if (routeCorrect === false)
|
|
return false;
|
|
if (domainPure === false)
|
|
return false;
|
|
if (problemFirst === false)
|
|
return false;
|
|
return true;
|
|
}
|
|
}
|
|
if (typeof input.scoreIndex === "number") {
|
|
return input.scoreIndex >= 65;
|
|
}
|
|
return null;
|
|
}
|
|
function getResultCases(report) {
|
|
return toArray(report.results)
|
|
.map((item) => toRecord(item))
|
|
.filter((item) => item !== null);
|
|
}
|
|
function buildCaseSummaries(report, runId, checkDialogAvailability) {
|
|
const results = getResultCases(report);
|
|
return results.map((item, index) => {
|
|
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
|
|
const checks = toRecord(item.checks);
|
|
const metricSubscores = toRecord(item.metric_subscores);
|
|
const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ??
|
|
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
|
|
null;
|
|
const closedState = isCaseClosed({ checks, scoreIndex });
|
|
const sessionId = `${runId}-${caseId}`;
|
|
const dialogAvailable = checkDialogAvailability
|
|
? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
|
|
: false;
|
|
return {
|
|
case_id: caseId,
|
|
domain: toStringSafe(item.domain),
|
|
query_class: toStringSafe(item.query_class),
|
|
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
|
|
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
|
|
trace_id: toStringSafe(item.trace_id),
|
|
reply_type: toStringSafe(item.reply_type),
|
|
session_id: sessionId,
|
|
dialog_available: dialogAvailable,
|
|
checks,
|
|
metric_subscores: metricSubscores
|
|
};
|
|
});
|
|
}
|
|
function buildCoverageFromCases(cases) {
|
|
const coverageByDomain = new Map();
|
|
let closedCases = 0;
|
|
let openCases = 0;
|
|
for (const item of cases) {
|
|
if (item.status === "closed")
|
|
closedCases += 1;
|
|
if (item.status === "open")
|
|
openCases += 1;
|
|
const domainKey = item.domain ?? "unknown";
|
|
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
|
|
current.total += 1;
|
|
if (item.status === "closed")
|
|
current.closed += 1;
|
|
coverageByDomain.set(domainKey, current);
|
|
}
|
|
const domainCoverage = Array.from(coverageByDomain.entries())
|
|
.map(([domain, value]) => ({
|
|
domain,
|
|
total_cases: value.total,
|
|
closed_cases: value.closed
|
|
}))
|
|
.sort((a, b) => b.total_cases - a.total_cases);
|
|
return {
|
|
closed_cases: closedCases,
|
|
open_cases: openCases,
|
|
domain_coverage: domainCoverage
|
|
};
|
|
}
|
|
function collectJsonCandidates(scanLimit) {
|
|
const candidates = [];
|
|
const sources = [
|
|
{ dir: config_1.REPORTS_DIR, suffix: ".json" },
|
|
{ dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" }
|
|
];
|
|
for (const source of sources) {
|
|
if (!fs_1.default.existsSync(source.dir))
|
|
continue;
|
|
const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
if (!entry.isFile())
|
|
continue;
|
|
if (!entry.name.endsWith(source.suffix))
|
|
continue;
|
|
const fullPath = path_1.default.resolve(source.dir, entry.name);
|
|
try {
|
|
const stat = fs_1.default.statSync(fullPath);
|
|
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
|
|
}
|
|
catch {
|
|
// skip broken file stat
|
|
}
|
|
}
|
|
}
|
|
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
|
|
}
|
|
function indexRuns(scanLimit) {
|
|
const files = collectJsonCandidates(scanLimit);
|
|
const dedup = new Map();
|
|
for (const item of files) {
|
|
let parsed;
|
|
try {
|
|
const raw = fs_1.default.readFileSync(item.path, "utf-8");
|
|
parsed = JSON.parse(raw);
|
|
}
|
|
catch {
|
|
continue;
|
|
}
|
|
const report = toRecord(parsed);
|
|
if (!report)
|
|
continue;
|
|
const runId = toStringSafe(report.run_id);
|
|
if (!runId)
|
|
continue;
|
|
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
|
|
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
|
|
const indexed = {
|
|
run_id: runId,
|
|
eval_target: evalTarget,
|
|
report_path: item.path,
|
|
report,
|
|
timestamp_iso: normalizedTime.iso,
|
|
timestamp_ms: normalizedTime.ms
|
|
};
|
|
const current = dedup.get(runId);
|
|
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
|
|
dedup.set(runId, indexed);
|
|
}
|
|
}
|
|
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
|
|
}
|
|
function parseFilters(query) {
|
|
const fromMs = parseDateMs(query.from);
|
|
const toMs = parseDateMs(query.to);
|
|
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
|
|
const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
|
|
? targetRaw
|
|
: "all";
|
|
const useMock = toStringSafe(query.use_mock);
|
|
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
|
|
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
|
|
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
|
|
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
|
|
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
|
|
return {
|
|
from_ms: fromMs,
|
|
to_ms: toMs,
|
|
target,
|
|
use_mock: useMockFilter,
|
|
prompt_contains: promptContains,
|
|
mode,
|
|
limit,
|
|
scan_limit: scanLimit
|
|
};
|
|
}
|
|
function matchesFilters(run, filters) {
|
|
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms)
|
|
return false;
|
|
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms)
|
|
return false;
|
|
if (filters.target !== "all" && run.eval_target !== filters.target)
|
|
return false;
|
|
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
|
|
if (filters.mode !== "all" && modeValue !== filters.mode)
|
|
return false;
|
|
if (filters.use_mock !== null) {
|
|
const useMockValue = toBooleanSafe(run.report.use_mock);
|
|
if (useMockValue !== filters.use_mock)
|
|
return false;
|
|
}
|
|
if (filters.prompt_contains.length > 0) {
|
|
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
|
|
if (!promptVersion.includes(filters.prompt_contains))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
function buildRunSummary(run) {
|
|
const connection = toRecord(run.report.connection);
|
|
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
|
|
const llmProvider = toStringSafe(run.report.llm_provider) ??
|
|
toStringSafe(run.report.llmProvider) ??
|
|
toStringSafe(connection?.llm_provider) ??
|
|
toStringSafe(connection?.llmProvider) ??
|
|
toStringSafe(normalizeConfig?.llm_provider) ??
|
|
toStringSafe(normalizeConfig?.llmProvider);
|
|
const model = toStringSafe(run.report.model) ??
|
|
toStringSafe(connection?.model) ??
|
|
toStringSafe(normalizeConfig?.model);
|
|
const cases = buildCaseSummaries(run.report, run.run_id, false);
|
|
const coverage = buildCoverageFromCases(cases);
|
|
const failures = countFailures(run.report);
|
|
return {
|
|
run_id: run.run_id,
|
|
eval_target: run.eval_target,
|
|
run_timestamp: run.timestamp_iso,
|
|
mode: toStringSafe(run.report.mode),
|
|
llm_provider: llmProvider,
|
|
model,
|
|
use_mock: toBooleanSafe(run.report.use_mock),
|
|
prompt_version: toStringSafe(run.report.prompt_version),
|
|
schema_version: toStringSafe(run.report.schema_version),
|
|
suite_id: toStringSafe(run.report.suite_id),
|
|
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
|
|
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
|
|
report_path: run.report_path,
|
|
score_index: computeScoreIndex(run.report, run.eval_target),
|
|
blocking_failures: failures.blocking,
|
|
quality_failures: failures.quality,
|
|
closed_cases: coverage.closed_cases,
|
|
open_cases: coverage.open_cases,
|
|
domain_coverage: coverage.domain_coverage
|
|
};
|
|
}
|
|
function mergeDomainCoverage(summaries) {
|
|
const merged = new Map();
|
|
for (const summary of summaries) {
|
|
for (const item of summary.domain_coverage) {
|
|
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
|
|
current.total += item.total_cases;
|
|
current.closed += item.closed_cases;
|
|
merged.set(item.domain, current);
|
|
}
|
|
}
|
|
return Array.from(merged.entries())
|
|
.map(([domain, value]) => ({
|
|
domain,
|
|
total_cases: value.total,
|
|
closed_cases: value.closed
|
|
}))
|
|
.sort((a, b) => b.total_cases - a.total_cases);
|
|
}
|
|
function buildHistoryStats(summaries) {
|
|
const byTarget = {};
|
|
let blockingRuns = 0;
|
|
let qualityRuns = 0;
|
|
const scoreValues = [];
|
|
for (const item of summaries) {
|
|
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
|
|
if (item.blocking_failures > 0)
|
|
blockingRuns += 1;
|
|
if (item.quality_failures > 0)
|
|
qualityRuns += 1;
|
|
if (typeof item.score_index === "number")
|
|
scoreValues.push(item.score_index);
|
|
}
|
|
const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null;
|
|
const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null;
|
|
const trend = latestScore === null || previousScore === null
|
|
? "flat"
|
|
: latestScore > previousScore + 0.5
|
|
? "up"
|
|
: latestScore < previousScore - 0.5
|
|
? "down"
|
|
: "flat";
|
|
return {
|
|
runs_total: summaries.length,
|
|
by_target: byTarget,
|
|
blocking_runs: blockingRuns,
|
|
quality_gap_runs: qualityRuns,
|
|
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
|
|
latest_score_index: latestScore,
|
|
previous_score_index: previousScore,
|
|
trend,
|
|
domain_coverage: mergeDomainCoverage(summaries)
|
|
};
|
|
}
|
|
function findRunById(runId, scanLimit = 3000) {
|
|
const indexed = indexRuns(scanLimit);
|
|
return indexed.find((item) => item.run_id === runId) ?? null;
|
|
}
|
|
function buildAssistantModeSummary(dialogRecord) {
|
|
if (!dialogRecord)
|
|
return null;
|
|
const conversation = toArray(dialogRecord.conversation)
|
|
.map((item) => toRecord(item))
|
|
.filter((item) => item !== null);
|
|
const lastAssistant = [...conversation]
|
|
.reverse()
|
|
.find((item) => toStringSafe(item.role) === "assistant");
|
|
const debug = toRecord(lastAssistant?.debug);
|
|
return {
|
|
reply_type: toStringSafe(lastAssistant?.reply_type),
|
|
trace_id: toStringSafe(lastAssistant?.trace_id),
|
|
detected_mode: toStringSafe(debug?.detected_mode),
|
|
execution_lane: toStringSafe(debug?.execution_lane),
|
|
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
|
|
living_router_mode: toStringSafe(debug?.living_router_mode),
|
|
fallback_type: toStringSafe(debug?.fallback_type)
|
|
};
|
|
}
|
|
function loadSessionDialog(runId, caseId) {
|
|
const sessionId = `${runId}-${caseId}`;
|
|
const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
|
|
if (!fs_1.default.existsSync(filePath)) {
|
|
return null;
|
|
}
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
|
|
}
|
|
catch {
|
|
return null;
|
|
}
|
|
const record = toRecord(parsed);
|
|
if (!record)
|
|
return null;
|
|
const conversation = toArray(record.conversation)
|
|
.map((item) => toRecord(item))
|
|
.filter((item) => item !== null);
|
|
const messages = conversation.map((item) => ({
|
|
role: toStringSafe(item.role) ?? "unknown",
|
|
text: toStringSafe(item.text) ?? "",
|
|
created_at: toStringSafe(item.created_at),
|
|
trace_id: toStringSafe(item.trace_id),
|
|
reply_type: toStringSafe(item.reply_type)
|
|
}));
|
|
const turns = toArray(record.turns)
|
|
.map((item) => toRecord(item))
|
|
.filter((item) => item !== null);
|
|
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
|
|
const humanReadable = toRecord(lastTurn?.human_readable);
|
|
const decomposition = toArray(humanReadable?.decomposition)
|
|
.map((item) => toStringSafe(item))
|
|
.filter((item) => item !== null);
|
|
return {
|
|
source: "assistant_session",
|
|
session_id: sessionId,
|
|
messages,
|
|
decomposition,
|
|
assistant_mode: buildAssistantModeSummary(record)
|
|
};
|
|
}
|
|
function buildFallbackDialog(run, caseId) {
|
|
const sessionId = `${run.run_id}-${caseId}`;
|
|
const results = getResultCases(run.report);
|
|
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
|
|
if (!targetCase) {
|
|
return {
|
|
source: "none",
|
|
session_id: sessionId,
|
|
messages: [],
|
|
decomposition: [],
|
|
assistant_mode: null
|
|
};
|
|
}
|
|
const userText = toStringSafe(targetCase.raw_question) ??
|
|
toStringSafe(targetCase.user_query_raw) ??
|
|
`Case ${caseId}`;
|
|
const assistantSummaryParts = [];
|
|
const validationPassed = toBooleanSafe(targetCase.validation_passed);
|
|
if (validationPassed !== null)
|
|
assistantSummaryParts.push(`validation_passed=${validationPassed}`);
|
|
const routeMatch = toBooleanSafe(targetCase.route_match);
|
|
if (routeMatch !== null)
|
|
assistantSummaryParts.push(`route_match=${routeMatch}`);
|
|
const intentMatch = toBooleanSafe(targetCase.intent_match);
|
|
if (intentMatch !== null)
|
|
assistantSummaryParts.push(`intent_match=${intentMatch}`);
|
|
const confidence = toStringSafe(targetCase.confidence_overall);
|
|
if (confidence)
|
|
assistantSummaryParts.push(`confidence=${confidence}`);
|
|
const metricSubscores = toRecord(targetCase.metric_subscores);
|
|
if (metricSubscores) {
|
|
for (const [key, value] of Object.entries(metricSubscores)) {
|
|
if (toNumberSafe(value) !== null) {
|
|
assistantSummaryParts.push(`${key}=${value}`);
|
|
}
|
|
}
|
|
}
|
|
if (assistantSummaryParts.length === 0) {
|
|
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
|
|
}
|
|
return {
|
|
source: "report_fallback",
|
|
session_id: sessionId,
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
text: userText,
|
|
created_at: null,
|
|
trace_id: null,
|
|
reply_type: null
|
|
},
|
|
{
|
|
role: "assistant",
|
|
text: assistantSummaryParts.join("\n"),
|
|
created_at: null,
|
|
trace_id: toStringSafe(targetCase.trace_id),
|
|
reply_type: toStringSafe(targetCase.reply_type)
|
|
}
|
|
],
|
|
decomposition: [],
|
|
assistant_mode: null
|
|
};
|
|
}
|
|
function buildAutoRunsRouter() {
|
|
const router = (0, express_1.Router)();
|
|
router.get("/api/autoruns/history", (req, res) => {
|
|
const filters = parseFilters(req.query);
|
|
const indexed = indexRuns(filters.scan_limit);
|
|
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
|
|
const summaries = filtered.map((run) => buildRunSummary(run));
|
|
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
|
|
const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort();
|
|
const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort();
|
|
(0, http_1.ok)(res, {
|
|
ok: true,
|
|
generated_at: new Date().toISOString(),
|
|
filters_applied: {
|
|
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
|
|
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
|
|
target: filters.target,
|
|
use_mock: filters.use_mock,
|
|
prompt_contains: filters.prompt_contains,
|
|
mode: filters.mode,
|
|
limit: filters.limit,
|
|
scan_limit: filters.scan_limit
|
|
},
|
|
available: {
|
|
targets: availableTargets,
|
|
modes: availableModes,
|
|
prompt_versions: availablePromptVersions
|
|
},
|
|
items: summaries,
|
|
stats: buildHistoryStats(summaries)
|
|
});
|
|
});
|
|
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
|
|
try {
|
|
const runId = String(req.params.run_id ?? "").trim();
|
|
if (!runId) {
|
|
throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400);
|
|
}
|
|
const run = findRunById(runId);
|
|
if (!run) {
|
|
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
|
|
}
|
|
const cases = buildCaseSummaries(run.report, run.run_id, true);
|
|
const coverage = buildCoverageFromCases(cases);
|
|
(0, http_1.ok)(res, {
|
|
ok: true,
|
|
run: buildRunSummary(run),
|
|
coverage,
|
|
cases,
|
|
report: run.report
|
|
});
|
|
}
|
|
catch (error) {
|
|
next(error);
|
|
}
|
|
});
|
|
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
|
|
try {
|
|
const runId = String(req.params.run_id ?? "").trim();
|
|
const caseId = String(req.params.case_id ?? "").trim();
|
|
if (!runId || !caseId) {
|
|
throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
|
|
}
|
|
const run = findRunById(runId);
|
|
if (!run) {
|
|
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
|
|
}
|
|
const sessionDialog = loadSessionDialog(runId, caseId);
|
|
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
|
|
(0, http_1.ok)(res, {
|
|
ok: true,
|
|
run_id: runId,
|
|
case_id: caseId,
|
|
...dialog
|
|
});
|
|
}
|
|
catch (error) {
|
|
next(error);
|
|
}
|
|
});
|
|
return router;
|
|
}
|