NODEDC_1C/llm_normalizer/backend/dist/routes/autoRuns.js

1484 lines
63 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildAutoRunsRouter = buildAutoRunsRouter;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const express_1 = require("express");
const config_1 = require("../config");
const http_1 = require("../utils/http");
const capabilitiesRegistry_1 = require("../services/capabilitiesRegistry");
const MANUAL_CASE_DECISIONS = [
"covered_ok",
"covered_but_bad_answer",
"candidate_for_implementation",
"needs_routing_extension",
"out_of_scope_but_answer_softly",
"unsafe_question_limit_strictly",
"needs_dialog_policy_fix",
"needs_capability_registry_update",
"bad_test_case"
];
const DECISION_QUEUE_MAP = {
covered_ok: "none",
covered_but_bad_answer: "policy_fix",
candidate_for_implementation: "routing_extension",
needs_routing_extension: "routing_extension",
out_of_scope_but_answer_softly: "soft_boundary",
unsafe_question_limit_strictly: "safety_policy",
needs_dialog_policy_fix: "policy_fix",
needs_capability_registry_update: "capability_registry",
bad_test_case: "testset_hygiene"
};
function toRecord(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function toArray(value) {
return Array.isArray(value) ? value : [];
}
function toStringSafe(value) {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toNumberSafe(value) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function toBooleanSafe(value) {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "string") {
const lowered = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(lowered))
return true;
if (["0", "false", "no", "off"].includes(lowered))
return false;
}
return null;
}
function parseDateMs(value) {
const asString = toStringSafe(value);
if (!asString) {
return null;
}
const ms = Date.parse(asString);
return Number.isFinite(ms) ? ms : null;
}
function clampInt(value, min, max, fallback) {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
const rounded = Math.trunc(value);
if (rounded < min)
return min;
if (rounded > max)
return max;
return rounded;
}
function parseManualCaseDecision(value, fallback = "needs_dialog_policy_fix") {
const normalized = toStringSafe(value);
if (!normalized)
return fallback;
return (MANUAL_CASE_DECISIONS.includes(normalized) ? normalized : fallback);
}
function parseAnnotationAuthor(value) {
const author = toStringSafe(value);
if (!author)
return null;
return author.slice(0, 80);
}
function readManualDecisionSchema() {
const fallback = {
schema_version: "manual_case_decision_schema_v1_fallback",
enum: MANUAL_CASE_DECISIONS,
labels: {
covered_ok: "Покрыто и ок",
covered_but_bad_answer: "Покрыто, но ответ плохой",
candidate_for_implementation: "Кандидат на внедрение",
needs_routing_extension: "Нужно расширение маршрутизации",
out_of_scope_but_answer_softly: "Вне скоупа, но нужен мягкий ответ",
unsafe_question_limit_strictly: "Высокий риск, строгие ограничения",
needs_dialog_policy_fix: "Нужен фикс диалоговой политики",
needs_capability_registry_update: "Нужно обновить реестр возможностей",
bad_test_case: "Плохой тест-кейс"
},
queue_mapping: DECISION_QUEUE_MAP
};
if (!fs_1.default.existsSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE)) {
return fallback;
}
try {
const parsed = JSON.parse(fs_1.default.readFileSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE, "utf-8"));
const record = toRecord(parsed);
return record ?? fallback;
}
catch {
return fallback;
}
}
function readAutoGenHistory() {
if (!fs_1.default.existsSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE))
return [];
try {
const parsed = JSON.parse(fs_1.default.readFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, "utf-8"));
if (!Array.isArray(parsed))
return [];
return parsed
.map((item) => toRecord(item))
.filter((item) => item !== null)
.map((item) => ({
generation_id: toStringSafe(item.generation_id) ?? "",
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
mode: toStringSafe(item.mode) ?? "codex_creative",
count: clampInt(toNumberSafe(item.count), 1, 300, 20),
domain: toStringSafe(item.domain),
questions: toArray(item.questions)
.map((q) => toStringSafe(q))
.filter((q) => q !== null)
.slice(0, 500),
generated_by: toStringSafe(item.generated_by),
saved_case_set_file: toStringSafe(item.saved_case_set_file),
context: toRecord(item.context)
? {
llm_provider: toStringSafe(toRecord(item.context)?.llm_provider),
model: toStringSafe(toRecord(item.context)?.model),
assistant_prompt_version: toStringSafe(toRecord(item.context)?.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(toRecord(item.context)?.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(toRecord(item.context)?.prompt_fingerprint)
}
: null
}))
.filter((item) => item.generation_id.length > 0)
.sort((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at));
}
catch {
return [];
}
}
function writeAutoGenHistory(records) {
const dir = path_1.default.dirname(config_1.AUTORUN_GENERATOR_HISTORY_FILE);
if (!fs_1.default.existsSync(dir)) {
fs_1.default.mkdirSync(dir, { recursive: true });
}
fs_1.default.writeFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, JSON.stringify(records, null, 2), "utf-8");
}
function readEvalDatasetCases(filePath) {
try {
const parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
if (Array.isArray(parsed)) {
return parsed.map((item) => toRecord(item)).filter((item) => item !== null);
}
const record = toRecord(parsed);
if (!record)
return [];
const cases = toArray(record.cases).map((item) => toRecord(item)).filter((item) => item !== null);
return cases;
}
catch {
return [];
}
}
function collectCanonicalQuestions(limit = 300) {
if (!fs_1.default.existsSync(config_1.EVAL_DATASETS_DIR)) {
return [];
}
const entries = fs_1.default.readdirSync(config_1.EVAL_DATASETS_DIR, { withFileTypes: true });
const questions = [];
for (const entry of entries) {
if (!entry.isFile() || !entry.name.endsWith(".json"))
continue;
const fullPath = path_1.default.resolve(config_1.EVAL_DATASETS_DIR, entry.name);
const cases = readEvalDatasetCases(fullPath);
for (const testCase of cases) {
const rawQuestion = toStringSafe(testCase.raw_question) ?? toStringSafe(testCase.user_message) ?? toStringSafe(testCase.query);
if (rawQuestion) {
questions.push(rawQuestion);
}
}
}
return Array.from(new Set(questions)).slice(0, limit);
}
function normalizeDomainHint(value) {
const domain = toStringSafe(value);
if (!domain)
return null;
return domain.toLowerCase();
}
function fallbackDomainTemplates(domain) {
if (domain?.includes("vat") || domain?.includes("ндс")) {
return [
"Сколько НДС к уплате на дату по организации?",
"Покажи прогноз НДС за период по организации.",
"Почему по НДС сейчас ноль и из чего сложился расчет?"
];
}
if (domain?.includes("counter") || domain?.includes("контраг")) {
return [
"Покажи топ контрагентов по сумме платежей за период.",
"Какой самый крупный договор у выбранной организации?",
"Какие документы были по контрагенту за весь период?"
];
}
if (domain?.includes("settlement") || domain?.includes("задолж") || domain?.includes("расчет")) {
return [
"Какие незакрытые расчеты висят на конец периода?",
"Есть ли незакрытые авансы по поставщикам?",
"Покажи цепочки закрытия по счетам 60/62."
];
}
return [
"С какой организацией сейчас можно работать в активном контуре?",
"Покажи ключевые операции за выбранный период.",
"Какие вопросы по этому домену ассистент поддерживает прямо сейчас?"
];
}
function mutateIntoQwenStyle(base, index) {
const wrappers = ["йо ", "слушай ", "подскажи плиз ", "короче ", "мож ", "а ну-ка "];
const tails = ["", " без воды", " по факту", " и коротко", " прям сейчас", " за весь период"];
const typoMap = [
[/\bкомпания\b/gi, "компиния"],
[/\bсейчас\b/gi, "щас"],
[/\bпожалуйста\b/gi, "плиз"],
[/\bкакая\b/gi, "кака"],
[/\bчто\b/gi, "че"]
];
const prefix = wrappers[index % wrappers.length];
const tail = tails[index % tails.length];
let text = `${prefix}${base}${tail}`.trim();
if (index % 2 === 0) {
const [pattern, replacement] = typoMap[index % typoMap.length];
text = text.replace(pattern, replacement);
}
return text;
}
function generateQwenSeedQuestions(count, domain) {
const seed = collectCanonicalQuestions(450);
const source = seed.length > 0 ? seed : fallbackDomainTemplates(domain);
const filtered = domain
? source.filter((item) => item.toLowerCase().includes(domain) || fallbackDomainTemplates(domain).includes(item))
: source;
const bag = filtered.length > 0 ? filtered : source;
const out = [];
for (let index = 0; index < count; index += 1) {
const base = bag[index % bag.length];
out.push(mutateIntoQwenStyle(base, index));
}
return Array.from(new Set(out)).slice(0, count);
}
function generateCodexCreativeQuestions(count, domain) {
const domainTemplates = fallbackDomainTemplates(domain);
const patterns = [
"Дай бизнес-срез по состоянию на дату: {q}",
"Нужен аккуратный ответ как бухгалтеру: {q}",
"Если данных не хватает, скажи что уточнить, но сначала попробуй: {q}",
"Сформулируй результат без технички и с шагом дальше: {q}",
"Проверь в read-only и скажи что видно: {q}"
];
const out = [];
for (let index = 0; index < count; index += 1) {
const base = domainTemplates[index % domainTemplates.length];
const pattern = patterns[index % patterns.length];
out.push(pattern.replace("{q}", base));
}
return Array.from(new Set(out)).slice(0, count);
}
function generateAutogenId() {
return `gen-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function readAnnotations() {
if (!fs_1.default.existsSync(config_1.AUTORUN_ANNOTATIONS_FILE)) {
return [];
}
try {
const raw = fs_1.default.readFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, "utf-8");
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) {
return [];
}
return parsed
.map((item) => toRecord(item))
.filter((item) => item !== null)
.map((item) => {
const context = toRecord(item.context);
return {
annotation_id: toStringSafe(item.annotation_id) ?? "",
run_id: toStringSafe(item.run_id) ?? "",
case_id: toStringSafe(item.case_id) ?? "",
session_id: toStringSafe(item.session_id) ?? "",
message_index: clampInt(toNumberSafe(item.message_index), 0, 100_000, 0),
rating: clampInt(toNumberSafe(item.rating), 1, 5, 1),
comment: toStringSafe(item.comment) ?? "",
manual_case_decision: parseManualCaseDecision(item.manual_case_decision),
annotation_author: parseAnnotationAuthor(item.annotation_author),
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
updated_at: toStringSafe(item.updated_at) ?? new Date().toISOString(),
context: {
message_id: toStringSafe(context?.message_id),
trace_id: toStringSafe(context?.trace_id),
reply_type: toStringSafe(context?.reply_type),
eval_target: toStringSafe(context?.eval_target) ?? "unknown",
prompt_version: toStringSafe(context?.prompt_version),
domain: toStringSafe(context?.domain),
query_class: toStringSafe(context?.query_class)
}
};
})
.filter((item) => item.annotation_id && item.run_id && item.case_id);
}
catch {
return [];
}
}
function writeAnnotations(items) {
fs_1.default.writeFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, JSON.stringify(items, null, 2), "utf-8");
}
function annotationKey(runId, caseId, messageIndex) {
return `${runId}::${caseId}::${messageIndex}`;
}
function buildAnnotationStatsMap(runId, annotations) {
const scoped = annotations.filter((item) => item.run_id === runId);
const buckets = new Map();
for (const item of scoped) {
const bucket = buckets.get(item.case_id) ?? { count: 0, ratings: [], latestMs: null };
bucket.count += 1;
bucket.ratings.push(item.rating);
const ms = Date.parse(item.updated_at);
if (Number.isFinite(ms) && (bucket.latestMs === null || ms > bucket.latestMs)) {
bucket.latestMs = ms;
}
buckets.set(item.case_id, bucket);
}
const result = new Map();
for (const [caseId, bucket] of buckets.entries()) {
const avg = bucket.ratings.length > 0 ? Number((bucket.ratings.reduce((a, b) => a + b, 0) / bucket.ratings.length).toFixed(2)) : null;
result.set(caseId, {
count: bucket.count,
latest_at: bucket.latestMs === null ? null : new Date(bucket.latestMs).toISOString(),
avg_rating: avg
});
}
return result;
}
function buildAnnotationsByMessageIndex(runId, caseId, annotations) {
const map = new Map();
for (const item of annotations) {
if (item.run_id !== runId || item.case_id !== caseId)
continue;
const current = map.get(item.message_index);
const currentMs = current ? Date.parse(current.updated_at) : null;
const nextMs = Date.parse(item.updated_at);
if (!current || (!Number.isNaN(nextMs) && (currentMs === null || nextMs >= currentMs))) {
map.set(item.message_index, item);
}
}
return map;
}
function resolveRunTarget(input) {
const explicit = toStringSafe(input.report.eval_target);
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
return explicit;
}
if (input.runId.startsWith("assistant-stage1-"))
return "assistant_stage1";
if (input.runId.startsWith("assistant-stage2-"))
return "assistant_stage2";
if (input.runId.startsWith("assistant-p0-"))
return "assistant_p0";
if (input.runId.startsWith("eval-"))
return "normalizer";
if (input.reportPath.endsWith(".report.json"))
return "normalizer";
return "unknown";
}
function normalizeTimestamp(report, fileMtimeMs) {
const first = parseDateMs(report.run_timestamp);
if (first !== null) {
return { iso: new Date(first).toISOString(), ms: first };
}
const second = parseDateMs(report.timestamp);
if (second !== null) {
return { iso: new Date(second).toISOString(), ms: second };
}
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
}
function rateToPercent(value) {
if (value === null)
return null;
if (value <= 1.2)
return Math.max(0, Math.min(100, value * 100));
return Math.max(0, Math.min(100, value));
}
function scoreToPercent(value) {
if (value === null)
return null;
if (value <= 5.2)
return Math.max(0, Math.min(100, (value / 5) * 100));
return Math.max(0, Math.min(100, value));
}
function average(values) {
const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item));
if (filtered.length === 0) {
return null;
}
const sum = filtered.reduce((acc, item) => acc + item, 0);
return Number((sum / filtered.length).toFixed(2));
}
function getMetricRecord(report) {
const metrics = toRecord(report.metrics);
if (!metrics)
return null;
const raw = toRecord(metrics.raw);
return raw ?? metrics;
}
function computeScoreIndex(report, target) {
const metrics = getMetricRecord(report);
if (!metrics) {
return null;
}
if (target === "assistant_p0") {
return average([
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
]);
}
if (target === "assistant_stage1") {
return average([
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score))
]);
}
if (target === "assistant_stage2") {
return average([
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
]);
}
return average([
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
]);
}
function countFailures(report) {
const acceptanceGate = toRecord(report.acceptance_gate);
const baselineGate = toRecord(report.baseline_stability_gate);
const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
const quality = toArray(acceptanceGate?.quality_failures).length +
toArray(baselineGate?.legacy_quality_failures).length +
toArray(baselineGate?.quality_gap_failures).length;
return { blocking, quality };
}
function caseScoreFromMetricSubscores(metricSubscores) {
if (!metricSubscores)
return null;
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
if (directProduct !== null) {
return Number(directProduct.toFixed(2));
}
const candidates = [
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
];
return average(candidates);
}
function isCaseClosed(input) {
const checks = input.checks;
if (checks) {
const routeCorrect = toBooleanSafe(checks.route_correct);
const domainPure = toBooleanSafe(checks.domain_pure);
const problemFirst = toBooleanSafe(checks.problem_first_answer);
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
if (routeCorrect === false)
return false;
if (domainPure === false)
return false;
if (problemFirst === false)
return false;
return true;
}
}
if (typeof input.scoreIndex === "number") {
return input.scoreIndex >= 65;
}
return null;
}
function getResultCases(report) {
return toArray(report.results)
.map((item) => toRecord(item))
.filter((item) => item !== null);
}
function buildCaseSummaries(report, runId, checkDialogAvailability, annotationStatsByCase) {
const results = getResultCases(report);
return results.map((item, index) => {
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
const checks = toRecord(item.checks);
const metricSubscores = toRecord(item.metric_subscores);
const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ??
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
null;
const closedState = isCaseClosed({ checks, scoreIndex });
const sessionId = `${runId}-${caseId}`;
const dialogAvailable = checkDialogAvailability
? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
: false;
const annotationStats = annotationStatsByCase?.get(caseId);
return {
case_id: caseId,
domain: toStringSafe(item.domain),
query_class: toStringSafe(item.query_class),
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type),
session_id: sessionId,
dialog_available: dialogAvailable,
commented_count: annotationStats?.count ?? 0,
latest_annotation_at: annotationStats?.latest_at ?? null,
avg_rating: annotationStats?.avg_rating ?? null,
checks,
metric_subscores: metricSubscores
};
});
}
function buildCoverageFromCases(cases) {
const coverageByDomain = new Map();
let closedCases = 0;
let openCases = 0;
for (const item of cases) {
if (item.status === "closed")
closedCases += 1;
if (item.status === "open")
openCases += 1;
const domainKey = item.domain ?? "unknown";
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
current.total += 1;
if (item.status === "closed")
current.closed += 1;
coverageByDomain.set(domainKey, current);
}
const domainCoverage = Array.from(coverageByDomain.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
return {
closed_cases: closedCases,
open_cases: openCases,
domain_coverage: domainCoverage
};
}
function collectJsonCandidates(scanLimit) {
const candidates = [];
const sources = [
{ dir: config_1.REPORTS_DIR, suffix: ".json" },
{ dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" }
];
for (const source of sources) {
if (!fs_1.default.existsSync(source.dir))
continue;
const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile())
continue;
if (!entry.name.endsWith(source.suffix))
continue;
const fullPath = path_1.default.resolve(source.dir, entry.name);
try {
const stat = fs_1.default.statSync(fullPath);
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
}
catch {
// skip broken file stat
}
}
}
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
}
function indexRuns(scanLimit) {
const files = collectJsonCandidates(scanLimit);
const dedup = new Map();
for (const item of files) {
let parsed;
try {
const raw = fs_1.default.readFileSync(item.path, "utf-8");
parsed = JSON.parse(raw);
}
catch {
continue;
}
const report = toRecord(parsed);
if (!report)
continue;
const runId = toStringSafe(report.run_id);
if (!runId)
continue;
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
const indexed = {
run_id: runId,
eval_target: evalTarget,
report_path: item.path,
report,
timestamp_iso: normalizedTime.iso,
timestamp_ms: normalizedTime.ms
};
const current = dedup.get(runId);
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
dedup.set(runId, indexed);
}
}
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
}
function parseFilters(query) {
const fromMs = parseDateMs(query.from);
const toMs = parseDateMs(query.to);
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
? targetRaw
: "all";
const useMock = toStringSafe(query.use_mock);
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
return {
from_ms: fromMs,
to_ms: toMs,
target,
use_mock: useMockFilter,
prompt_contains: promptContains,
mode,
limit,
scan_limit: scanLimit
};
}
function matchesFilters(run, filters) {
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms)
return false;
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms)
return false;
if (filters.target !== "all" && run.eval_target !== filters.target)
return false;
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
if (filters.mode !== "all" && modeValue !== filters.mode)
return false;
if (filters.use_mock !== null) {
const useMockValue = toBooleanSafe(run.report.use_mock);
if (useMockValue !== filters.use_mock)
return false;
}
if (filters.prompt_contains.length > 0) {
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
if (!promptVersion.includes(filters.prompt_contains))
return false;
}
return true;
}
function buildRunSummary(run) {
const connection = toRecord(run.report.connection);
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
const llmProvider = toStringSafe(run.report.llm_provider) ??
toStringSafe(run.report.llmProvider) ??
toStringSafe(connection?.llm_provider) ??
toStringSafe(connection?.llmProvider) ??
toStringSafe(normalizeConfig?.llm_provider) ??
toStringSafe(normalizeConfig?.llmProvider);
const model = toStringSafe(run.report.model) ??
toStringSafe(connection?.model) ??
toStringSafe(normalizeConfig?.model);
const cases = buildCaseSummaries(run.report, run.run_id, false);
const coverage = buildCoverageFromCases(cases);
const failures = countFailures(run.report);
return {
run_id: run.run_id,
eval_target: run.eval_target,
run_timestamp: run.timestamp_iso,
mode: toStringSafe(run.report.mode),
llm_provider: llmProvider,
model,
use_mock: toBooleanSafe(run.report.use_mock),
prompt_version: toStringSafe(run.report.prompt_version),
schema_version: toStringSafe(run.report.schema_version),
suite_id: toStringSafe(run.report.suite_id),
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
report_path: run.report_path,
score_index: computeScoreIndex(run.report, run.eval_target),
blocking_failures: failures.blocking,
quality_failures: failures.quality,
closed_cases: coverage.closed_cases,
open_cases: coverage.open_cases,
domain_coverage: coverage.domain_coverage
};
}
function mergeDomainCoverage(summaries) {
const merged = new Map();
for (const summary of summaries) {
for (const item of summary.domain_coverage) {
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
current.total += item.total_cases;
current.closed += item.closed_cases;
merged.set(item.domain, current);
}
}
return Array.from(merged.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
}
function buildHistoryStats(summaries) {
const byTarget = {};
let blockingRuns = 0;
let qualityRuns = 0;
const scoreValues = [];
for (const item of summaries) {
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
if (item.blocking_failures > 0)
blockingRuns += 1;
if (item.quality_failures > 0)
qualityRuns += 1;
if (typeof item.score_index === "number")
scoreValues.push(item.score_index);
}
const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null;
const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null;
const trend = latestScore === null || previousScore === null
? "flat"
: latestScore > previousScore + 0.5
? "up"
: latestScore < previousScore - 0.5
? "down"
: "flat";
return {
runs_total: summaries.length,
by_target: byTarget,
blocking_runs: blockingRuns,
quality_gap_runs: qualityRuns,
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
latest_score_index: latestScore,
previous_score_index: previousScore,
trend,
domain_coverage: mergeDomainCoverage(summaries)
};
}
function findRunById(runId, scanLimit = 3000) {
const indexed = indexRuns(scanLimit);
return indexed.find((item) => item.run_id === runId) ?? null;
}
function buildAssistantModeSummary(dialogRecord) {
if (!dialogRecord)
return null;
const conversation = toArray(dialogRecord.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastAssistant = [...conversation]
.reverse()
.find((item) => toStringSafe(item.role) === "assistant");
const debug = toRecord(lastAssistant?.debug);
return {
reply_type: toStringSafe(lastAssistant?.reply_type),
trace_id: toStringSafe(lastAssistant?.trace_id),
detected_mode: toStringSafe(debug?.detected_mode),
execution_lane: toStringSafe(debug?.execution_lane),
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
living_router_mode: toStringSafe(debug?.living_router_mode),
fallback_type: toStringSafe(debug?.fallback_type)
};
}
function loadSessionDialog(runId, caseId) {
const sessionId = `${runId}-${caseId}`;
const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
if (!fs_1.default.existsSync(filePath)) {
return null;
}
let parsed;
try {
parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
}
catch {
return null;
}
const record = toRecord(parsed);
if (!record)
return null;
const conversation = toArray(record.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const messages = conversation.map((item) => ({
message_id: toStringSafe(item.message_id),
role: toStringSafe(item.role) ?? "unknown",
text: toStringSafe(item.text) ?? "",
created_at: toStringSafe(item.created_at),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type)
}));
const turns = toArray(record.turns)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
const humanReadable = toRecord(lastTurn?.human_readable);
const decomposition = toArray(humanReadable?.decomposition)
.map((item) => toStringSafe(item))
.filter((item) => item !== null);
return {
source: "assistant_session",
session_id: sessionId,
messages,
decomposition,
assistant_mode: buildAssistantModeSummary(record)
};
}
function buildFallbackDialog(run, caseId) {
const sessionId = `${run.run_id}-${caseId}`;
const results = getResultCases(run.report);
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
if (!targetCase) {
return {
source: "none",
session_id: sessionId,
messages: [],
decomposition: [],
assistant_mode: null
};
}
const userText = toStringSafe(targetCase.raw_question) ??
toStringSafe(targetCase.user_query_raw) ??
`Case ${caseId}`;
const assistantSummaryParts = [];
const validationPassed = toBooleanSafe(targetCase.validation_passed);
if (validationPassed !== null)
assistantSummaryParts.push(`validation_passed=${validationPassed}`);
const routeMatch = toBooleanSafe(targetCase.route_match);
if (routeMatch !== null)
assistantSummaryParts.push(`route_match=${routeMatch}`);
const intentMatch = toBooleanSafe(targetCase.intent_match);
if (intentMatch !== null)
assistantSummaryParts.push(`intent_match=${intentMatch}`);
const confidence = toStringSafe(targetCase.confidence_overall);
if (confidence)
assistantSummaryParts.push(`confidence=${confidence}`);
const metricSubscores = toRecord(targetCase.metric_subscores);
if (metricSubscores) {
for (const [key, value] of Object.entries(metricSubscores)) {
if (toNumberSafe(value) !== null) {
assistantSummaryParts.push(`${key}=${value}`);
}
}
}
if (assistantSummaryParts.length === 0) {
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
}
return {
source: "report_fallback",
session_id: sessionId,
messages: [
{
message_id: null,
role: "user",
text: userText,
created_at: null,
trace_id: null,
reply_type: null
},
{
message_id: null,
role: "assistant",
text: assistantSummaryParts.join("\n"),
created_at: null,
trace_id: toStringSafe(targetCase.trace_id),
reply_type: toStringSafe(targetCase.reply_type)
}
],
decomposition: [],
assistant_mode: null
};
}
function withMessageAnnotations(runId, caseId, messages, annotations) {
const byIndex = buildAnnotationsByMessageIndex(runId, caseId, annotations);
return messages.map((message, index) => {
const annotation = byIndex.get(index) ?? null;
return {
...message,
message_index: index,
commented: annotation !== null,
annotation
};
});
}
function generateAnnotationId() {
return `ann-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function parseComment(value) {
const text = toStringSafe(value) ?? "";
return text.trim();
}
function parseDecisionFilter(value) {
const normalized = toStringSafe(value);
if (!normalized || normalized === "all")
return "all";
return parseManualCaseDecision(normalized);
}
function parseAutoGenMode(value) {
const normalized = toStringSafe(value)?.toLowerCase() ?? "";
if (normalized === "qwen_seed" || normalized === "codex_creative") {
return normalized;
}
return "codex_creative";
}
function parseAutogenCount(value) {
return clampInt(toNumberSafe(value), 1, 200, 24);
}
function parseAutogenDomain(value) {
const domain = normalizeDomainHint(value);
if (!domain)
return null;
return domain.slice(0, 80);
}
function hasAnyRunFilterQuery(query) {
return Boolean(toStringSafe(query.from) ??
toStringSafe(query.to) ??
toStringSafe(query.target) ??
toStringSafe(query.mode) ??
toStringSafe(query.use_mock) ??
toStringSafe(query.prompt_contains));
}
function buildAutogenCaseSetFileName(mode, generationId) {
const now = new Date();
const stamp = [
now.getUTCFullYear(),
String(now.getUTCMonth() + 1).padStart(2, "0"),
String(now.getUTCDate()).padStart(2, "0"),
String(now.getUTCHours()).padStart(2, "0"),
String(now.getUTCMinutes()).padStart(2, "0"),
String(now.getUTCSeconds()).padStart(2, "0")
].join("");
return `assistant_autogen_${mode}_${stamp}_${generationId}.json`;
}
function buildAutogenCaseSetPayload(input) {
const cases = input.questions.map((question, index) => ({
case_id: `AUTO-${String(index + 1).padStart(3, "0")}`,
scenario_tag: `${input.mode}_${input.domain ?? "general"}`,
question_type: "direct",
broadness_level: "medium",
turns: [{ user_message: question }],
expected_hints: {
expected_reply_type: null,
expected_degraded_to: null
}
}));
return {
suite_id: `assistant_autogen_${input.generationId}`,
suite_version: "0.1.0",
schema_version: "assistant_autogen_suite_v0_1",
generated_at: new Date().toISOString(),
generation_id: input.generationId,
mode: input.mode,
domain: input.domain,
scenario_count: cases.length,
case_ids: cases.map((item) => item.case_id),
cases
};
}
function collectPostAnalysis(annotations, runMap, limitPerQueue) {
const byDecision = {};
const byQueue = {};
const byDomain = new Map();
const queues = {
routing_extension: [],
policy_fix: [],
capability_registry: [],
soft_boundary: [],
safety_policy: [],
testset_hygiene: [],
covered_ok: []
};
const registry = (0, capabilitiesRegistry_1.loadCapabilitiesRegistry)();
for (const item of annotations) {
byDecision[item.manual_case_decision] = (byDecision[item.manual_case_decision] ?? 0) + 1;
const queueKey = DECISION_QUEUE_MAP[item.manual_case_decision];
byQueue[queueKey] = (byQueue[queueKey] ?? 0) + 1;
const run = runMap.get(item.run_id) ?? null;
const caseSummary = run
? buildCaseSummaries(run.report, run.run_id, false).find((candidate) => candidate.case_id === item.case_id) ?? null
: null;
const nearestGroup = (0, capabilitiesRegistry_1.resolveNearestCapabilityGroup)({
domain: caseSummary?.domain ?? item.context.domain,
queryClass: caseSummary?.query_class ?? item.context.query_class
}) ??
registry.groups[0] ??
null;
const domainKey = caseSummary?.domain ?? item.context.domain ?? "unknown";
byDomain.set(domainKey, (byDomain.get(domainKey) ?? 0) + 1);
const view = {
annotation_id: item.annotation_id,
run_id: item.run_id,
case_id: item.case_id,
message_index: item.message_index,
rating: item.rating,
comment: item.comment,
manual_case_decision: item.manual_case_decision,
annotation_author: item.annotation_author,
updated_at: item.updated_at,
domain: caseSummary?.domain ?? item.context.domain ?? null,
query_class: caseSummary?.query_class ?? item.context.query_class ?? null,
trace_id: item.context.trace_id ?? caseSummary?.trace_id ?? null,
reply_type: item.context.reply_type ?? caseSummary?.reply_type ?? null,
nearest_capability_group: nearestGroup
? {
group_code: nearestGroup.group_code,
group_title: nearestGroup.group_title,
maturity_status: nearestGroup.maturity_status
}
: null
};
if (queueKey === "none") {
if (queues.covered_ok.length < limitPerQueue)
queues.covered_ok.push(view);
continue;
}
if (!queues[queueKey]) {
queues[queueKey] = [];
}
if (queues[queueKey].length < limitPerQueue) {
queues[queueKey].push(view);
}
}
const domainSummary = Array.from(byDomain.entries())
.map(([domain, total]) => ({ domain, total }))
.sort((a, b) => b.total - a.total);
return {
stats: {
annotations_total: annotations.length,
by_decision: byDecision,
by_queue: byQueue,
domains_total: domainSummary.length
},
domain_summary: domainSummary,
queues,
recommended_regression_candidates: [
...queues.routing_extension.slice(0, 20),
...queues.policy_fix.slice(0, 20),
...queues.safety_policy.slice(0, 20)
].slice(0, 60)
};
}
function buildAutoRunsRouter() {
const router = (0, express_1.Router)();
router.get("/api/autoruns/history", (req, res) => {
const filters = parseFilters(req.query);
const indexed = indexRuns(filters.scan_limit);
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
const summaries = filtered.map((run) => buildRunSummary(run));
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort();
const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort();
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
target: filters.target,
use_mock: filters.use_mock,
prompt_contains: filters.prompt_contains,
mode: filters.mode,
limit: filters.limit,
scan_limit: filters.scan_limit
},
available: {
targets: availableTargets,
modes: availableModes,
prompt_versions: availablePromptVersions
},
items: summaries,
stats: buildHistoryStats(summaries)
});
});
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
if (!runId) {
throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const annotations = readAnnotations();
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const cases = buildCaseSummaries(run.report, run.run_id, true, annotationStatsByCase);
const coverage = buildCoverageFromCases(cases);
(0, http_1.ok)(res, {
ok: true,
run: buildRunSummary(run),
coverage,
cases,
annotations_summary: {
total: annotations.filter((item) => item.run_id === runId).length
},
report: run.report
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
const caseId = String(req.params.case_id ?? "").trim();
if (!runId || !caseId) {
throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
const annotations = readAnnotations();
const messages = withMessageAnnotations(runId, caseId, dialog.messages, annotations);
(0, http_1.ok)(res, {
ok: true,
run_id: runId,
case_id: caseId,
...dialog,
messages,
annotations: annotations
.filter((item) => item.run_id === runId && item.case_id === caseId)
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/annotations", (req, res, next) => {
try {
const runIdFilter = toStringSafe(req.query.run_id);
const caseIdFilter = toStringSafe(req.query.case_id);
const minRatingRaw = toNumberSafe(req.query.min_rating);
const minRating = minRatingRaw === null ? null : clampInt(minRatingRaw, 1, 5, 1);
const decisionFilter = parseDecisionFilter(req.query.manual_case_decision);
const limit = clampInt(toNumberSafe(req.query.limit), 1, 2000, 400);
const scanLimit = clampInt(toNumberSafe(req.query.scan_limit), 50, 5000, 2500);
const annotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (caseIdFilter ? item.case_id === caseIdFilter : true))
.filter((item) => (minRating === null ? true : item.rating >= minRating))
.filter((item) => (decisionFilter === "all" ? true : item.manual_case_decision === decisionFilter))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, limit);
const runIndex = indexRuns(scanLimit);
const runMap = new Map(runIndex.map((item) => [item.run_id, item]));
const items = annotations.map((item) => {
const run = runMap.get(item.run_id) ?? null;
const runSummary = run ? buildRunSummary(run) : null;
const cases = run ? buildCaseSummaries(run.report, run.run_id, false) : [];
const caseSummary = cases.find((candidate) => candidate.case_id === item.case_id) ?? null;
return {
...item,
run: runSummary,
case_summary: caseSummary,
technical_context: {
report_path: run?.report_path ?? null,
trace_id: item.context.trace_id,
reply_type: item.context.reply_type,
domain: item.context.domain,
query_class: item.context.query_class,
checks: caseSummary?.checks ?? null,
metric_subscores: caseSummary?.metric_subscores ?? null
}
};
});
const avgRating = items.length > 0 ? Number((items.reduce((acc, item) => acc + item.rating, 0) / items.length).toFixed(2)) : null;
const byDecision = items.reduce((acc, item) => {
acc[item.manual_case_decision] = (acc[item.manual_case_decision] ?? 0) + 1;
return acc;
}, {});
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
case_id: caseIdFilter ?? null,
min_rating: minRating,
manual_case_decision: decisionFilter,
limit
},
stats: {
total: items.length,
avg_rating: avgRating,
by_decision: byDecision
},
available_manual_case_decisions: MANUAL_CASE_DECISIONS,
manual_case_decision_schema: readManualDecisionSchema(),
items
});
}
catch (error) {
next(error);
}
});
router.post("/api/autoruns/annotations", (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "JSON body is required", 400);
}
const runId = toStringSafe(body.run_id);
const caseId = toStringSafe(body.case_id);
const messageIndexRaw = toNumberSafe(body.message_index);
const ratingRaw = toNumberSafe(body.rating);
const comment = parseComment(body.comment);
const manualCaseDecision = parseManualCaseDecision(body.manual_case_decision);
const annotationAuthor = parseAnnotationAuthor(body.annotation_author);
if (!runId || !caseId) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "run_id and case_id are required", 400);
}
if (messageIndexRaw === null) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "message_index is required", 400);
}
const messageIndex = clampInt(messageIndexRaw, 0, 100_000, 0);
if (ratingRaw === null) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "rating is required", 400);
}
const rating = clampInt(ratingRaw, 1, 5, 1);
if (comment.length === 0) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "comment is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const cases = buildCaseSummaries(run.report, run.run_id, false);
const caseSummary = cases.find((item) => item.case_id === caseId) ?? null;
if (!caseSummary) {
throw new http_1.ApiError("AUTORUN_CASE_NOT_FOUND", `Case not found: ${caseId} in run ${runId}`, 404);
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
if (messageIndex >= dialog.messages.length) {
throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_FOUND", `Message index ${messageIndex} out of range`, 400);
}
const targetMessage = dialog.messages[messageIndex];
const targetRole = toStringSafe(targetMessage.role) ?? "unknown";
if (targetRole !== "assistant") {
throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_ASSISTANT", "Only assistant answers can be annotated", 400);
}
const nowIso = new Date().toISOString();
const annotations = readAnnotations();
const key = annotationKey(runId, caseId, messageIndex);
const existingIndex = annotations.findIndex((item) => annotationKey(item.run_id, item.case_id, item.message_index) === key);
const existing = existingIndex >= 0 ? annotations[existingIndex] : null;
const annotation = {
annotation_id: existing?.annotation_id ?? generateAnnotationId(),
run_id: runId,
case_id: caseId,
session_id: caseSummary.session_id,
message_index: messageIndex,
rating,
comment,
manual_case_decision: manualCaseDecision,
annotation_author: annotationAuthor,
created_at: existing?.created_at ?? nowIso,
updated_at: nowIso,
context: {
message_id: toStringSafe(targetMessage.message_id),
trace_id: toStringSafe(targetMessage.trace_id) ?? caseSummary.trace_id,
reply_type: toStringSafe(targetMessage.reply_type) ?? caseSummary.reply_type,
eval_target: run.eval_target,
prompt_version: toStringSafe(run.report.prompt_version),
domain: caseSummary.domain,
query_class: caseSummary.query_class
}
};
if (existingIndex >= 0) {
annotations[existingIndex] = annotation;
}
else {
annotations.push(annotation);
}
writeAnnotations(annotations);
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const caseStats = annotationStatsByCase.get(caseId) ?? null;
(0, http_1.ok)(res, {
ok: true,
annotation,
case_annotation_stats: caseStats
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/manual-decision-schema", (_req, res) => {
(0, http_1.ok)(res, {
ok: true,
schema: readManualDecisionSchema(),
enum: MANUAL_CASE_DECISIONS
});
});
router.get("/api/autoruns/post-analysis", (req, res, next) => {
try {
const query = req.query;
const runIdFilter = toStringSafe(query.run_id);
const limitPerQueue = clampInt(toNumberSafe(query.limit_per_queue), 5, 250, 40);
const annotationLimit = clampInt(toNumberSafe(query.annotation_limit), 20, 5000, 1500);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 2500);
const runFilters = parseFilters(query);
const applyRunFilters = hasAnyRunFilterQuery(query);
const runIndex = indexRuns(Math.max(scanLimit, runFilters.scan_limit));
const filteredRuns = applyRunFilters ? runIndex.filter((run) => matchesFilters(run, runFilters)) : runIndex;
const runMap = new Map(filteredRuns.map((run) => [run.run_id, run]));
const scopedAnnotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (runMap.size > 0 ? runMap.has(item.run_id) : true))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, annotationLimit);
const analysis = collectPostAnalysis(scopedAnnotations, runMap, limitPerQueue);
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
run_filters_applied: applyRunFilters,
limit_per_queue: limitPerQueue,
annotation_limit: annotationLimit,
scan_limit: scanLimit
},
runs_considered: filteredRuns.slice(0, 500).map((item) => buildRunSummary(item)),
manual_case_decision_schema: readManualDecisionSchema(),
post_analysis: analysis
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/autogen/history", (req, res, next) => {
try {
const limit = clampInt(toNumberSafe(req.query.limit), 1, 500, 120);
const rawMode = toStringSafe(req.query.mode);
const includeAllModes = !rawMode || !["qwen_seed", "codex_creative"].includes(rawMode);
const modeFilter = rawMode ?? "codex_creative";
const items = readAutoGenHistory()
.filter((item) => (includeAllModes ? true : item.mode === modeFilter))
.slice(0, limit);
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
items
});
}
catch (error) {
next(error);
}
});
router.post("/api/autoruns/autogen/generate", (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_AUTOGEN_PAYLOAD", "JSON body is required", 400);
}
const mode = parseAutoGenMode(body.mode);
const count = parseAutogenCount(body.count);
const domain = parseAutogenDomain(body.domain);
const persistCaseSet = toBooleanSafe(body.persist_to_eval_cases) ?? true;
const generatedBy = parseAnnotationAuthor(body.generated_by);
const context = toRecord(body.context);
const questions = mode === "qwen_seed"
? generateQwenSeedQuestions(count, domain)
: generateCodexCreativeQuestions(count, domain);
const generationId = generateAutogenId();
let savedCaseSetFile = null;
if (persistCaseSet) {
if (!fs_1.default.existsSync(config_1.EVAL_CASES_DIR)) {
fs_1.default.mkdirSync(config_1.EVAL_CASES_DIR, { recursive: true });
}
const fileName = buildAutogenCaseSetFileName(mode, generationId);
const filePath = path_1.default.resolve(config_1.EVAL_CASES_DIR, fileName);
const payload = buildAutogenCaseSetPayload({
generationId,
mode,
domain,
questions
});
fs_1.default.writeFileSync(filePath, JSON.stringify(payload, null, 2), "utf-8");
savedCaseSetFile = fileName;
}
const record = {
generation_id: generationId,
created_at: new Date().toISOString(),
mode,
count: questions.length,
domain,
questions,
generated_by: generatedBy,
saved_case_set_file: savedCaseSetFile,
context: context
? {
llm_provider: toStringSafe(context.llm_provider),
model: toStringSafe(context.model),
assistant_prompt_version: toStringSafe(context.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(context.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(context.prompt_fingerprint)
}
: null
};
const history = readAutoGenHistory();
history.unshift(record);
writeAutoGenHistory(history.slice(0, 500));
(0, http_1.ok)(res, {
ok: true,
generation: record
});
}
catch (error) {
next(error);
}
});
return router;
}