NODEDC_1C/llm_normalizer/backend/dist/routes/autoRuns.js

2394 lines
103 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.__autoRunsQuestionTestUtils = void 0;
exports.buildAutoRunsRouter = buildAutoRunsRouter;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const express_1 = require("express");
const iconv_lite_1 = __importDefault(require("iconv-lite"));
const config_1 = require("../config");
const agentSemanticRunRegistry_1 = require("../services/agentSemanticRunRegistry");
const http_1 = require("../utils/http");
const capabilitiesRegistry_1 = require("../services/capabilitiesRegistry");
const openaiResponsesClient_1 = require("../services/openaiResponsesClient");
const MANUAL_CASE_DECISIONS = [
"covered_ok",
"covered_but_bad_answer",
"candidate_for_implementation",
"needs_routing_extension",
"out_of_scope_but_answer_softly",
"unsafe_question_limit_strictly",
"needs_dialog_policy_fix",
"needs_capability_registry_update",
"bad_test_case"
];
const DECISION_QUEUE_MAP = {
covered_ok: "none",
covered_but_bad_answer: "policy_fix",
candidate_for_implementation: "routing_extension",
needs_routing_extension: "routing_extension",
out_of_scope_but_answer_softly: "soft_boundary",
unsafe_question_limit_strictly: "safety_policy",
needs_dialog_policy_fix: "policy_fix",
needs_capability_registry_update: "capability_registry",
bad_test_case: "testset_hygiene"
};
function toRecord(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function toArray(value) {
return Array.isArray(value) ? value : [];
}
function toStringSafe(value) {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toNumberSafe(value) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function toBooleanSafe(value) {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "string") {
const lowered = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(lowered))
return true;
if (["0", "false", "no", "off"].includes(lowered))
return false;
}
return null;
}
function parseDateMs(value) {
const asString = toStringSafe(value);
if (!asString) {
return null;
}
const ms = Date.parse(asString);
return Number.isFinite(ms) ? ms : null;
}
function clampInt(value, min, max, fallback) {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
const rounded = Math.trunc(value);
if (rounded < min)
return min;
if (rounded > max)
return max;
return rounded;
}
function isAutoGenMode(value) {
return value === "qwen_seed" || value === "codex_creative" || value === "saved_user_sessions";
}
function parseAutoGenTitle(value) {
const title = toStringSafe(value);
if (!title) {
return null;
}
return repairAutogenMojibake(title).slice(0, 160);
}
function parseManualCaseDecision(value, fallback = "needs_dialog_policy_fix") {
const normalized = toStringSafe(value);
if (!normalized)
return fallback;
return (MANUAL_CASE_DECISIONS.includes(normalized) ? normalized : fallback);
}
function parseAnnotationAuthor(value) {
const author = toStringSafe(value);
if (!author)
return null;
return author.slice(0, 80);
}
function parseAnnotationResolved(value, fallback = false) {
const parsed = toBooleanSafe(value);
return parsed === null ? fallback : parsed;
}
function readManualDecisionSchema() {
const fallback = {
schema_version: "manual_case_decision_schema_v1_fallback",
enum: MANUAL_CASE_DECISIONS,
labels: {
covered_ok: "Покрыто и ок",
covered_but_bad_answer: "Покрыто, но ответ плохой",
candidate_for_implementation: "Кандидат на внедрение",
needs_routing_extension: "Нужно расширение маршрутизации",
out_of_scope_but_answer_softly: "Вне скоупа, но нужен мягкий ответ",
unsafe_question_limit_strictly: "Высокий риск, строгие ограничения",
needs_dialog_policy_fix: "Нужен фикс диалоговой политики",
needs_capability_registry_update: "Нужно обновить реестр возможностей",
bad_test_case: "Плохой тест-кейс"
},
queue_mapping: DECISION_QUEUE_MAP
};
if (!fs_1.default.existsSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE)) {
return fallback;
}
try {
const parsed = JSON.parse(fs_1.default.readFileSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE, "utf-8"));
const record = toRecord(parsed);
return record ?? fallback;
}
catch {
return fallback;
}
}
function readAutoGenHistory() {
if (!fs_1.default.existsSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE))
return [];
try {
const parsed = JSON.parse(fs_1.default.readFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, "utf-8"));
if (!Array.isArray(parsed))
return [];
return parsed
.map((item) => toRecord(item))
.filter((item) => item !== null)
.map((item) => ({
generation_id: toStringSafe(item.generation_id) ?? "",
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
mode: isAutoGenMode(toStringSafe(item.mode)) ? toStringSafe(item.mode) : "codex_creative",
title: parseAutoGenTitle(item.title),
count: clampInt(toNumberSafe(item.count), 1, 300, 20),
domain: toStringSafe(item.domain),
questions: parseAssistantSessionQuestions(item.questions),
generated_by: toStringSafe(item.generated_by),
saved_case_set_file: toStringSafe(item.saved_case_set_file),
context: toRecord(item.context)
? {
llm_provider: toStringSafe(toRecord(item.context)?.llm_provider),
model: toStringSafe(toRecord(item.context)?.model),
assistant_prompt_version: toStringSafe(toRecord(item.context)?.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(toRecord(item.context)?.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(toRecord(item.context)?.prompt_fingerprint)
? repairAutogenMojibake(String(toRecord(item.context)?.prompt_fingerprint))
: null,
autogen_personality_id: toStringSafe(toRecord(item.context)?.autogen_personality_id),
autogen_personality_prompt: toStringSafe(toRecord(item.context)?.autogen_personality_prompt)
? repairAutogenMojibake(String(toRecord(item.context)?.autogen_personality_prompt))
: null,
source_session_id: toStringSafe(toRecord(item.context)?.source_session_id),
saved_session_file: toStringSafe(toRecord(item.context)?.saved_session_file),
saved_case_set_kind: toStringSafe(toRecord(item.context)?.saved_case_set_kind),
agent_run: toBooleanSafe(toRecord(item.context)?.agent_run),
agent_focus: toStringSafe(toRecord(item.context)?.agent_focus)
? repairAutogenMojibake(String(toRecord(item.context)?.agent_focus))
: null,
architecture_phase: toStringSafe(toRecord(item.context)?.architecture_phase),
source_spec_file: toStringSafe(toRecord(item.context)?.source_spec_file),
scenario_id: toStringSafe(toRecord(item.context)?.scenario_id),
semantic_tags: Array.isArray(toRecord(item.context)?.semantic_tags)
? Array.from(new Set((toRecord(item.context)?.semantic_tags)
.map((tag) => toStringSafe(tag))
.filter((tag) => Boolean(tag))))
: null,
latest_acceptance: null
}
: null
}))
.filter((item) => item.generation_id.length > 0)
.sort((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at));
}
catch {
return [];
}
}
function writeAutoGenHistory(records) {
const dir = path_1.default.dirname(config_1.AUTORUN_GENERATOR_HISTORY_FILE);
if (!fs_1.default.existsSync(dir)) {
fs_1.default.mkdirSync(dir, { recursive: true });
}
fs_1.default.writeFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, JSON.stringify(records, null, 2), "utf-8");
}
function isAgentSemanticHistoryRecord(record) {
if (record.context?.agent_run === true) {
return true;
}
if (record.context?.saved_case_set_kind === "agent_semantic_scenario") {
return true;
}
return typeof record.title === "string" && record.title.trim().toUpperCase().startsWith("AGENT");
}
function hydrateAutoGenHistoryForApi(records) {
const specSummaryCache = new Map();
const acceptanceCache = new Map();
const readSpecSummary = (sourceSpecFile) => {
const normalizedPath = toStringSafe(sourceSpecFile);
if (!normalizedPath) {
return null;
}
if (specSummaryCache.has(normalizedPath)) {
return specSummaryCache.get(normalizedPath) ?? null;
}
const summary = (0, agentSemanticRunRegistry_1.readAgentSemanticSpecSummaryFromFile)(normalizedPath);
specSummaryCache.set(normalizedPath, summary);
return summary;
};
const readAcceptanceSummary = (scenarioId) => {
const normalizedScenarioId = toStringSafe(scenarioId);
if (!normalizedScenarioId) {
return null;
}
if (acceptanceCache.has(normalizedScenarioId)) {
return acceptanceCache.get(normalizedScenarioId) ?? null;
}
const summary = (0, agentSemanticRunRegistry_1.findLatestAgentSemanticAcceptanceSummary)({
artifactsRootDir: path_1.default.resolve(config_1.ARTIFACTS_DIR, "domain_runs"),
repoRootDir: config_1.PROJECT_ROOT,
scenarioId: normalizedScenarioId
});
acceptanceCache.set(normalizedScenarioId, summary);
return summary;
};
return records.map((record) => {
if (!isAgentSemanticHistoryRecord(record)) {
return record;
}
const specSummary = readSpecSummary(record.context?.source_spec_file);
const scenarioId = toStringSafe(record.context?.scenario_id) ?? specSummary?.scenario_id ?? null;
const semanticTags = Array.isArray(record.context?.semantic_tags) && record.context?.semantic_tags.length > 0
? Array.from(new Set(record.context?.semantic_tags.map((item) => String(item).trim()).filter((item) => item.length > 0)))
: specSummary?.semantic_tags ?? [];
const latestAcceptance = readAcceptanceSummary(scenarioId);
return {
...record,
context: {
...(record.context ?? {
llm_provider: null,
model: null,
assistant_prompt_version: null,
decomposition_prompt_version: null,
prompt_fingerprint: null,
autogen_personality_id: null,
autogen_personality_prompt: null
}),
scenario_id: scenarioId,
semantic_tags: semanticTags,
latest_acceptance: latestAcceptance
}
};
});
}
function readEvalDatasetCases(filePath) {
try {
const parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
if (Array.isArray(parsed)) {
return parsed.map((item) => toRecord(item)).filter((item) => item !== null);
}
const record = toRecord(parsed);
if (!record)
return [];
const cases = toArray(record.cases).map((item) => toRecord(item)).filter((item) => item !== null);
return cases;
}
catch {
return [];
}
}
function collectCanonicalQuestions(limit = 300) {
if (!fs_1.default.existsSync(config_1.EVAL_DATASETS_DIR)) {
return [];
}
const entries = fs_1.default.readdirSync(config_1.EVAL_DATASETS_DIR, { withFileTypes: true });
const questions = [];
for (const entry of entries) {
if (!entry.isFile() || !entry.name.endsWith(".json"))
continue;
const fullPath = path_1.default.resolve(config_1.EVAL_DATASETS_DIR, entry.name);
const cases = readEvalDatasetCases(fullPath);
for (const testCase of cases) {
const rawQuestion = toStringSafe(testCase.raw_question) ?? toStringSafe(testCase.user_message) ?? toStringSafe(testCase.query);
if (rawQuestion) {
questions.push(sanitizeGeneratedQuestion(rawQuestion));
}
}
}
return Array.from(new Set(questions.filter((item) => item.length > 0))).slice(0, limit);
}
function normalizeDomainHint(value) {
const domain = toStringSafe(value);
if (!domain)
return null;
return domain.toLowerCase();
}
function buildAutogenPromptFromCapabilityGroup(group) {
const supported = group.supported_operations.slice(0, 3).join(", ");
const examples = group.typical_queries.slice(0, 2).join(" | ");
const hints = group.one_c_hints.slice(0, 2).join(", ");
const operationsPart = supported ? ` Опирайся на операции: ${supported}.` : "";
const examplesPart = examples ? ` Ближайшие формулировки: ${examples}.` : "";
const hintsPart = hints ? ` Можно мягко упоминать контекст 1С: ${hints}.` : "";
return (`Генерируй реалистичные вопросы бухгалтера по группе "${group.group_title}".` +
` Добавляй живую разговорную форму и опечатки, но сохраняй бизнес-смысл.${operationsPart}${examplesPart}${hintsPart}` +
" Не выдумывай операции вне read-only режима.");
}
function buildAutogenPersonalityCatalog() {
const builtIn = [
{
id: "general",
label: "Общий контур",
domain: null,
default_prompt: "Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл.",
source: "built_in"
}
];
const registry = (0, capabilitiesRegistry_1.loadCapabilitiesRegistry)();
const registryBased = registry.groups.map((group) => ({
id: `registry_${group.group_code}`,
label: `${group.group_title} (реестр)`,
domain: group.group_code,
default_prompt: buildAutogenPromptFromCapabilityGroup(group),
source: "capabilities_registry"
}));
const dedup = new Map();
for (const item of [...builtIn, ...registryBased]) {
if (!item.id.trim())
continue;
if (!dedup.has(item.id)) {
dedup.set(item.id, item);
}
}
return [...dedup.values()].map((item) => ({
...item,
label: repairAutogenMojibake(item.label),
default_prompt: repairAutogenMojibake(item.default_prompt)
}));
}
function fallbackDomainTemplates(domain) {
if (domain?.includes("vat") || domain?.includes("ндс")) {
return [
"Сколько НДС к уплате на дату по организации?",
"Покажи прогноз НДС за период по организации.",
"Почему по НДС сейчас ноль и из чего сложился расчет?"
];
}
if (domain?.includes("counter") || domain?.includes("контраг")) {
return [
"Покажи топ контрагентов по сумме платежей за период.",
"Какой самый крупный договор у выбранной организации?",
"Какие документы были по контрагенту за весь период?"
];
}
if (domain?.includes("settlement") || domain?.includes("задолж") || domain?.includes("расчет")) {
return [
"Какие незакрытые расчеты висят на конец периода?",
"Есть ли незакрытые авансы по поставщикам?",
"Покажи цепочки закрытия по счетам 60/62."
];
}
return [
"С какой организацией сейчас можно работать в активном контуре?",
"Покажи ключевые операции за выбранный период.",
"Какие вопросы по этому домену ассистент поддерживает прямо сейчас?"
];
}
function mutateIntoQwenStyle(base, index) {
const wrappers = ["йо ", "слушай ", "подскажи плиз ", "короче ", "мож ", "а ну-ка "];
const tails = ["", " без воды", " по факту", " и коротко", " прям сейчас", " за весь период"];
const typoMap = [
[/\bкомпания\b/gi, "компиния"],
[/\bсейчас\b/gi, "щас"],
[/\bпожалуйста\b/gi, "плиз"],
[/\bкакая\b/gi, "кака"],
[/\bчто\b/gi, "че"]
];
const prefix = wrappers[index % wrappers.length];
const tail = tails[index % tails.length];
let text = `${prefix}${base}${tail}`.trim();
if (index % 2 === 0) {
const [pattern, replacement] = typoMap[index % typoMap.length];
text = text.replace(pattern, replacement);
}
return text;
}
function generateQwenSeedQuestions(count, domain) {
const seed = collectCanonicalQuestions(450);
const source = seed.length > 0 ? seed : fallbackDomainTemplates(domain);
const filtered = domain
? source.filter((item) => item.toLowerCase().includes(domain) || fallbackDomainTemplates(domain).includes(item))
: source;
const bag = filtered.length > 0 ? filtered : source;
const out = [];
for (let index = 0; index < count; index += 1) {
const base = bag[index % bag.length];
out.push(sanitizeGeneratedQuestion(mutateIntoQwenStyle(base, index)));
}
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
}
function generateCodexCreativeQuestions(count, domain) {
const domainTemplates = fallbackDomainTemplates(domain);
const patterns = [
"Дай бизнес-срез по состоянию на дату: {q}",
"Нужен аккуратный ответ как бухгалтеру: {q}",
"Если данных не хватает, скажи что уточнить, но сначала попробуй: {q}",
"Сформулируй результат без технички и с шагом дальше: {q}",
"Проверь в read-only и скажи что видно: {q}"
];
const out = [];
for (let index = 0; index < count; index += 1) {
const base = domainTemplates[index % domainTemplates.length];
const pattern = patterns[index % patterns.length];
out.push(sanitizeGeneratedQuestion(pattern.replace("{q}", base)));
}
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
}
function generateAutogenId() {
return `gen-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function readAnnotations() {
if (!fs_1.default.existsSync(config_1.AUTORUN_ANNOTATIONS_FILE)) {
return [];
}
try {
const raw = fs_1.default.readFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, "utf-8");
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) {
return [];
}
return parsed
.map((item) => toRecord(item))
.filter((item) => item !== null)
.map((item) => {
const context = toRecord(item.context);
return {
annotation_id: toStringSafe(item.annotation_id) ?? "",
run_id: toStringSafe(item.run_id) ?? "",
case_id: toStringSafe(item.case_id) ?? "",
session_id: toStringSafe(item.session_id) ?? "",
message_index: clampInt(toNumberSafe(item.message_index), 0, 100_000, 0),
rating: clampInt(toNumberSafe(item.rating), 1, 5, 1),
comment: toStringSafe(item.comment) ?? "",
manual_case_decision: parseManualCaseDecision(item.manual_case_decision),
annotation_author: parseAnnotationAuthor(item.annotation_author),
resolved: parseAnnotationResolved(item.resolved),
resolved_at: toStringSafe(item.resolved_at),
resolved_by: parseAnnotationAuthor(item.resolved_by),
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
updated_at: toStringSafe(item.updated_at) ?? new Date().toISOString(),
context: {
message_id: toStringSafe(context?.message_id),
trace_id: toStringSafe(context?.trace_id),
reply_type: toStringSafe(context?.reply_type),
eval_target: toStringSafe(context?.eval_target) ?? "unknown",
prompt_version: toStringSafe(context?.prompt_version),
domain: toStringSafe(context?.domain),
query_class: toStringSafe(context?.query_class),
question_text: toStringSafe(context?.question_text),
answer_text: toStringSafe(context?.answer_text)
}
};
})
.filter((item) => item.annotation_id && item.run_id && item.case_id);
}
catch {
return [];
}
}
function writeAnnotations(items) {
fs_1.default.writeFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, JSON.stringify(items, null, 2), "utf-8");
}
function annotationKey(runId, caseId, messageIndex) {
return `${runId}::${caseId}::${messageIndex}`;
}
function buildAnnotationStatsMap(runId, annotations) {
const scoped = annotations.filter((item) => item.run_id === runId);
const buckets = new Map();
for (const item of scoped) {
const bucket = buckets.get(item.case_id) ?? { count: 0, ratings: [], latestMs: null };
bucket.count += 1;
bucket.ratings.push(item.rating);
const ms = Date.parse(item.updated_at);
if (Number.isFinite(ms) && (bucket.latestMs === null || ms > bucket.latestMs)) {
bucket.latestMs = ms;
}
buckets.set(item.case_id, bucket);
}
const result = new Map();
for (const [caseId, bucket] of buckets.entries()) {
const avg = bucket.ratings.length > 0 ? Number((bucket.ratings.reduce((a, b) => a + b, 0) / bucket.ratings.length).toFixed(2)) : null;
result.set(caseId, {
count: bucket.count,
latest_at: bucket.latestMs === null ? null : new Date(bucket.latestMs).toISOString(),
avg_rating: avg
});
}
return result;
}
function buildAnnotationsByMessageIndex(runId, caseId, annotations) {
const map = new Map();
for (const item of annotations) {
if (item.run_id !== runId || item.case_id !== caseId)
continue;
const current = map.get(item.message_index);
const currentMs = current ? Date.parse(current.updated_at) : null;
const nextMs = Date.parse(item.updated_at);
if (!current || (!Number.isNaN(nextMs) && (currentMs === null || nextMs >= currentMs))) {
map.set(item.message_index, item);
}
}
return map;
}
function resolveRunTarget(input) {
const explicit = toStringSafe(input.report.eval_target);
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
return explicit;
}
if (input.runId.startsWith("assistant-stage1-"))
return "assistant_stage1";
if (input.runId.startsWith("assistant-stage2-"))
return "assistant_stage2";
if (input.runId.startsWith("assistant-p0-"))
return "assistant_p0";
if (input.runId.startsWith("eval-"))
return "normalizer";
if (input.reportPath.endsWith(".report.json"))
return "normalizer";
return "unknown";
}
function normalizeTimestamp(report, fileMtimeMs) {
const first = parseDateMs(report.run_timestamp);
if (first !== null) {
return { iso: new Date(first).toISOString(), ms: first };
}
const second = parseDateMs(report.timestamp);
if (second !== null) {
return { iso: new Date(second).toISOString(), ms: second };
}
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
}
function rateToPercent(value) {
if (value === null)
return null;
if (value <= 1.2)
return Math.max(0, Math.min(100, value * 100));
return Math.max(0, Math.min(100, value));
}
function scoreToPercent(value) {
if (value === null)
return null;
if (value <= 5.2)
return Math.max(0, Math.min(100, (value / 5) * 100));
return Math.max(0, Math.min(100, value));
}
function average(values) {
const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item));
if (filtered.length === 0) {
return null;
}
const sum = filtered.reduce((acc, item) => acc + item, 0);
return Number((sum / filtered.length).toFixed(2));
}
function getMetricRecord(report) {
const metrics = toRecord(report.metrics);
if (!metrics)
return null;
const raw = toRecord(metrics.raw);
return raw ?? metrics;
}
function computeScoreIndex(report, target) {
const metrics = getMetricRecord(report);
if (!metrics) {
return null;
}
if (target === "assistant_p0") {
return average([
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
]);
}
if (target === "assistant_stage1") {
return average([
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score)),
rateToPercent(toNumberSafe(metrics.stage4_contract_compliance_rate))
]);
}
if (target === "assistant_stage2") {
return average([
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
]);
}
return average([
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
]);
}
function countFailures(report) {
const acceptanceGate = toRecord(report.acceptance_gate);
const baselineGate = toRecord(report.baseline_stability_gate);
const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
const quality = toArray(acceptanceGate?.quality_failures).length +
toArray(baselineGate?.legacy_quality_failures).length +
toArray(baselineGate?.quality_gap_failures).length;
return { blocking, quality };
}
function caseScoreFromMetricSubscores(metricSubscores) {
if (!metricSubscores)
return null;
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
if (directProduct !== null) {
return Number(directProduct.toFixed(2));
}
const candidates = [
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
];
return average(candidates);
}
function isCaseClosed(input) {
const checks = input.checks;
if (checks) {
const routeCorrect = toBooleanSafe(checks.route_correct);
const domainPure = toBooleanSafe(checks.domain_pure);
const problemFirst = toBooleanSafe(checks.problem_first_answer);
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
if (routeCorrect === false)
return false;
if (domainPure === false)
return false;
if (problemFirst === false)
return false;
return true;
}
}
if (typeof input.scoreIndex === "number") {
return input.scoreIndex >= 65;
}
return null;
}
function getResultCases(report) {
return toArray(report.results)
.map((item) => toRecord(item))
.filter((item) => item !== null);
}
function buildCaseSummaries(report, runId, checkDialogAvailability, annotationStatsByCase) {
const results = getResultCases(report);
return results.map((item, index) => {
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
const checks = toRecord(item.checks);
const metricSubscores = toRecord(item.metric_subscores);
const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ??
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
null;
const closedState = isCaseClosed({ checks, scoreIndex });
const sessionId = `${runId}-${caseId}`;
const dialogAvailable = checkDialogAvailability
? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
: false;
const annotationStats = annotationStatsByCase?.get(caseId);
return {
case_id: caseId,
domain: toStringSafe(item.domain),
query_class: toStringSafe(item.query_class),
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type),
session_id: sessionId,
dialog_available: dialogAvailable,
commented_count: annotationStats?.count ?? 0,
latest_annotation_at: annotationStats?.latest_at ?? null,
avg_rating: annotationStats?.avg_rating ?? null,
checks,
metric_subscores: metricSubscores
};
});
}
function buildCoverageFromCases(cases) {
const coverageByDomain = new Map();
let closedCases = 0;
let openCases = 0;
for (const item of cases) {
if (item.status === "closed")
closedCases += 1;
if (item.status === "open")
openCases += 1;
const domainKey = item.domain ?? "unknown";
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
current.total += 1;
if (item.status === "closed")
current.closed += 1;
coverageByDomain.set(domainKey, current);
}
const domainCoverage = Array.from(coverageByDomain.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
return {
closed_cases: closedCases,
open_cases: openCases,
domain_coverage: domainCoverage
};
}
function collectJsonCandidates(scanLimit) {
const candidates = [];
const sources = [
{ dir: config_1.REPORTS_DIR, suffix: ".json" },
{ dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" }
];
for (const source of sources) {
if (!fs_1.default.existsSync(source.dir))
continue;
const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile())
continue;
if (!entry.name.endsWith(source.suffix))
continue;
const fullPath = path_1.default.resolve(source.dir, entry.name);
try {
const stat = fs_1.default.statSync(fullPath);
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
}
catch {
// skip broken file stat
}
}
}
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
}
function indexRuns(scanLimit) {
const files = collectJsonCandidates(scanLimit);
const dedup = new Map();
for (const item of files) {
let parsed;
try {
const raw = fs_1.default.readFileSync(item.path, "utf-8");
parsed = JSON.parse(raw);
}
catch {
continue;
}
const report = toRecord(parsed);
if (!report)
continue;
const runId = toStringSafe(report.run_id);
if (!runId)
continue;
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
const indexed = {
run_id: runId,
eval_target: evalTarget,
report_path: item.path,
report,
timestamp_iso: normalizedTime.iso,
timestamp_ms: normalizedTime.ms
};
const current = dedup.get(runId);
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
dedup.set(runId, indexed);
}
}
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
}
function parseFilters(query) {
const fromMs = parseDateMs(query.from);
const toMs = parseDateMs(query.to);
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
? targetRaw
: "all";
const useMock = toStringSafe(query.use_mock);
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
return {
from_ms: fromMs,
to_ms: toMs,
target,
use_mock: useMockFilter,
prompt_contains: promptContains,
mode,
limit,
scan_limit: scanLimit
};
}
function matchesFilters(run, filters) {
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms)
return false;
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms)
return false;
if (filters.target !== "all" && run.eval_target !== filters.target)
return false;
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
if (filters.mode !== "all" && modeValue !== filters.mode)
return false;
if (filters.use_mock !== null) {
const useMockValue = toBooleanSafe(run.report.use_mock);
if (useMockValue !== filters.use_mock)
return false;
}
if (filters.prompt_contains.length > 0) {
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
if (!promptVersion.includes(filters.prompt_contains))
return false;
}
return true;
}
function buildRunSummary(run) {
const connection = toRecord(run.report.connection);
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
const llmProvider = toStringSafe(run.report.llm_provider) ??
toStringSafe(run.report.llmProvider) ??
toStringSafe(connection?.llm_provider) ??
toStringSafe(connection?.llmProvider) ??
toStringSafe(normalizeConfig?.llm_provider) ??
toStringSafe(normalizeConfig?.llmProvider);
const model = toStringSafe(run.report.model) ??
toStringSafe(connection?.model) ??
toStringSafe(normalizeConfig?.model);
const cases = buildCaseSummaries(run.report, run.run_id, false);
const coverage = buildCoverageFromCases(cases);
const failures = countFailures(run.report);
return {
run_id: run.run_id,
eval_target: run.eval_target,
run_timestamp: run.timestamp_iso,
mode: toStringSafe(run.report.mode),
llm_provider: llmProvider,
model,
use_mock: toBooleanSafe(run.report.use_mock),
analysis_date: toStringSafe(run.report.analysis_date),
prompt_version: toStringSafe(run.report.prompt_version),
schema_version: toStringSafe(run.report.schema_version),
suite_id: toStringSafe(run.report.suite_id),
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
report_path: run.report_path,
score_index: computeScoreIndex(run.report, run.eval_target),
blocking_failures: failures.blocking,
quality_failures: failures.quality,
closed_cases: coverage.closed_cases,
open_cases: coverage.open_cases,
domain_coverage: coverage.domain_coverage
};
}
function mergeDomainCoverage(summaries) {
const merged = new Map();
for (const summary of summaries) {
for (const item of summary.domain_coverage) {
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
current.total += item.total_cases;
current.closed += item.closed_cases;
merged.set(item.domain, current);
}
}
return Array.from(merged.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
}
function buildHistoryStats(summaries) {
const byTarget = {};
let blockingRuns = 0;
let qualityRuns = 0;
const scoreValues = [];
for (const item of summaries) {
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
if (item.blocking_failures > 0)
blockingRuns += 1;
if (item.quality_failures > 0)
qualityRuns += 1;
if (typeof item.score_index === "number")
scoreValues.push(item.score_index);
}
const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null;
const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null;
const trend = latestScore === null || previousScore === null
? "flat"
: latestScore > previousScore + 0.5
? "up"
: latestScore < previousScore - 0.5
? "down"
: "flat";
return {
runs_total: summaries.length,
by_target: byTarget,
blocking_runs: blockingRuns,
quality_gap_runs: qualityRuns,
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
latest_score_index: latestScore,
previous_score_index: previousScore,
trend,
domain_coverage: mergeDomainCoverage(summaries)
};
}
function findRunById(runId, scanLimit = 3000) {
const indexed = indexRuns(scanLimit);
return indexed.find((item) => item.run_id === runId) ?? null;
}
function buildAssistantModeSummary(dialogRecord) {
if (!dialogRecord)
return null;
const conversation = toArray(dialogRecord.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastAssistant = [...conversation]
.reverse()
.find((item) => toStringSafe(item.role) === "assistant");
const debug = toRecord(lastAssistant?.debug);
return {
reply_type: toStringSafe(lastAssistant?.reply_type),
trace_id: toStringSafe(lastAssistant?.trace_id),
detected_mode: toStringSafe(debug?.detected_mode),
execution_lane: toStringSafe(debug?.execution_lane),
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
living_router_mode: toStringSafe(debug?.living_router_mode),
fallback_type: toStringSafe(debug?.fallback_type)
};
}
function loadSessionDialog(runId, caseId) {
const sessionId = `${runId}-${caseId}`;
const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
if (!fs_1.default.existsSync(filePath)) {
return null;
}
let parsed;
try {
parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8"));
}
catch {
return null;
}
const record = toRecord(parsed);
if (!record)
return null;
const conversation = toArray(record.conversation)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const messages = conversation.map((item) => ({
message_id: toStringSafe(item.message_id),
role: toStringSafe(item.role) ?? "unknown",
text: toStringSafe(item.text) ?? "",
created_at: toStringSafe(item.created_at),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type)
}));
const turns = toArray(record.turns)
.map((item) => toRecord(item))
.filter((item) => item !== null);
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
const humanReadable = toRecord(lastTurn?.human_readable);
const decomposition = toArray(humanReadable?.decomposition)
.map((item) => toStringSafe(item))
.filter((item) => item !== null);
return {
source: "assistant_session",
session_id: sessionId,
messages,
decomposition,
assistant_mode: buildAssistantModeSummary(record)
};
}
function buildFallbackDialog(run, caseId) {
const sessionId = `${run.run_id}-${caseId}`;
const results = getResultCases(run.report);
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
if (!targetCase) {
return {
source: "none",
session_id: sessionId,
messages: [],
decomposition: [],
assistant_mode: null
};
}
const userText = toStringSafe(targetCase.raw_question) ??
toStringSafe(targetCase.user_query_raw) ??
`Case ${caseId}`;
const assistantSummaryParts = [];
const validationPassed = toBooleanSafe(targetCase.validation_passed);
if (validationPassed !== null)
assistantSummaryParts.push(`validation_passed=${validationPassed}`);
const routeMatch = toBooleanSafe(targetCase.route_match);
if (routeMatch !== null)
assistantSummaryParts.push(`route_match=${routeMatch}`);
const intentMatch = toBooleanSafe(targetCase.intent_match);
if (intentMatch !== null)
assistantSummaryParts.push(`intent_match=${intentMatch}`);
const confidence = toStringSafe(targetCase.confidence_overall);
if (confidence)
assistantSummaryParts.push(`confidence=${confidence}`);
const metricSubscores = toRecord(targetCase.metric_subscores);
if (metricSubscores) {
for (const [key, value] of Object.entries(metricSubscores)) {
if (toNumberSafe(value) !== null) {
assistantSummaryParts.push(`${key}=${value}`);
}
}
}
if (assistantSummaryParts.length === 0) {
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
}
return {
source: "report_fallback",
session_id: sessionId,
messages: [
{
message_id: null,
role: "user",
text: userText,
created_at: null,
trace_id: null,
reply_type: null
},
{
message_id: null,
role: "assistant",
text: assistantSummaryParts.join("\n"),
created_at: null,
trace_id: toStringSafe(targetCase.trace_id),
reply_type: toStringSafe(targetCase.reply_type)
}
],
decomposition: [],
assistant_mode: null
};
}
function withMessageAnnotations(runId, caseId, messages, annotations) {
const byIndex = buildAnnotationsByMessageIndex(runId, caseId, annotations);
return messages.map((message, index) => {
const annotation = byIndex.get(index) ?? null;
return {
...message,
message_index: index,
commented: annotation !== null,
annotation
};
});
}
function buildRunAggregateDialog(run, annotations) {
const cases = buildCaseSummaries(run.report, run.run_id, false);
const messages = [];
const decomposition = [];
let globalMessageIndex = 0;
for (const item of cases) {
const caseId = item.case_id;
const caseDialog = loadSessionDialog(run.run_id, caseId) ?? buildFallbackDialog(run, caseId);
const annotatedCaseMessages = withMessageAnnotations(run.run_id, caseId, caseDialog.messages, annotations);
for (const caseMessage of annotatedCaseMessages) {
const localMessageIndex = toNumberSafe(caseMessage.message_index) ?? 0;
messages.push({
...caseMessage,
case_id: caseId,
case_message_index: localMessageIndex,
message_index: globalMessageIndex
});
globalMessageIndex += 1;
}
if (caseDialog.decomposition.length > 0) {
decomposition.push(...caseDialog.decomposition.map((step) => `[${caseId}] ${step}`));
}
}
return {
source: "run_aggregate",
session_id: `${run.run_id}::__all__`,
messages,
decomposition,
assistant_mode: null
};
}
function generateAnnotationId() {
return `ann-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function parseComment(value) {
const text = toStringSafe(value) ?? "";
return text.trim();
}
function parseDecisionFilter(value) {
const normalized = toStringSafe(value);
if (!normalized || normalized === "all")
return "all";
return parseManualCaseDecision(normalized);
}
function parseAutoGenMode(value) {
const normalized = toStringSafe(value)?.toLowerCase() ?? "";
if (normalized === "qwen_seed" || normalized === "codex_creative" || normalized === "saved_user_sessions") {
return normalized;
}
return "codex_creative";
}
function parseAutogenCount(value) {
return clampInt(toNumberSafe(value), 1, 200, 24);
}
function parseAutogenDomain(value) {
const domain = normalizeDomainHint(value);
if (!domain)
return null;
return domain.slice(0, 80);
}
function parseAutogenLlmRuntimeConfig(body, context) {
const llm = toRecord(body.llm);
const providerRaw = toStringSafe(llm?.llm_provider ?? context?.llm_provider)?.toLowerCase() ?? "";
const model = toStringSafe(llm?.model ?? context?.model);
if (!model || (providerRaw !== "openai" && providerRaw !== "local")) {
return null;
}
return {
llm_provider: providerRaw === "local" ? "local" : "openai",
api_key: toStringSafe(llm?.api_key) ?? "",
model,
base_url: toStringSafe(llm?.base_url),
temperature: toNumberSafe(llm?.temperature),
max_output_tokens: toNumberSafe(llm?.max_output_tokens)
};
}
function textMojibakeScore(value) {
const source = String(value ?? "");
const cyrillic = (source.match(/[А-Яа-яЁё]/g) ?? []).length;
const latin = (source.match(/[A-Za-z]/g) ?? []).length;
const hardMarkers = (source.match(/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/g) ?? []).length;
const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length;
const doubleEncodedMarkers = (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length;
const replacement = (source.match(/\uFFFD/g) ?? []).length;
const c1Controls = (source.match(/[\u0080-\u009f]/g) ?? []).length;
return cyrillic + latin - replacement * 8 - c1Controls * 5 - hardMarkers * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2;
}
function looksLikeMojibake(value) {
const source = String(value ?? "");
if (!source.trim()) {
return false;
}
if (/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/.test(source)) {
return true;
}
if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) {
return true;
}
return (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length >= 2;
}
function encodeWin1251MojibakeBytes(value) {
const chunks = [];
for (const char of String(value ?? "")) {
const code = char.codePointAt(0) ?? 0;
if (code >= 0x80 && code <= 0x9f) {
chunks.push(Buffer.from([code]));
continue;
}
chunks.push(iconv_lite_1.default.encode(char, "win1251"));
}
return Buffer.concat(chunks);
}
function decodeUtf8FromWin1251Mojibake(value) {
return encodeWin1251MojibakeBytes(value).toString("utf8");
}
function repairKnownReplacementDamagedAutogenText(value) {
return String(value ?? "")
.replace(/\u0410\u041b\u042c\u0422\u0415\u0420\u041d\u0410\u0422[\uFFFD?]+\u0412\u0410/giu, "\u0410\u041b\u042c\u0422\u0415\u0420\u041d\u0410\u0422\u0418\u0412\u0410")
.replace(/\u041e\u0411\u0429[\uFFFD?]+\u0419/giu, "\u041e\u0411\u0429\u0418\u0419");
}
function repairAutogenMojibake(value) {
const source = repairKnownReplacementDamagedAutogenText(String(value ?? ""));
if (!looksLikeMojibake(source) && !/[\u0080-\u009f\uFFFD]/.test(source)) {
return source;
}
let candidate = source;
for (let pass = 0; pass < 3; pass += 1) {
let improved = false;
try {
const fromWin1251 = decodeUtf8FromWin1251Mojibake(candidate);
if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) {
candidate = fromWin1251;
improved = true;
}
}
catch {
// ignore
}
try {
const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8");
if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) {
candidate = fromLatin1;
improved = true;
}
}
catch {
// ignore
}
const repairedKnownText = repairKnownReplacementDamagedAutogenText(candidate);
if (repairedKnownText !== candidate) {
candidate = repairedKnownText;
improved = true;
}
if (!improved) {
break;
}
}
return repairKnownReplacementDamagedAutogenText(candidate);
}
function sanitizeGeneratedQuestion(value) {
return repairAutogenMojibake(String(value ?? ""))
.replace(/\r/g, " ")
.replace(/\t/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function parseAssistantSessionQuestions(value) {
return toArray(value)
.map((item) => sanitizeGeneratedQuestion(typeof item === "string" ? item : ""))
.filter((item) => item.length > 0)
.slice(0, 500);
}
const AUTOGEN_QUESTION_PLACEHOLDER_PATTERN = /^(?:questions?|вопросы?|список\s+вопросов)$/iu;
const AUTOGEN_QUESTION_TAIL_PATTERNS = [
/^(?:без\s+воды|по\s+факту|и\s+коротко|коротко|прям(?:\s+)?сейчас|за\s+весь\s+период|по\s+делу)\??$/iu
];
function stripAutogenQuestionSuffix(value) {
return sanitizeGeneratedQuestion(value).replace(/[?!.:,;]+$/u, "").trim();
}
function isAutogenQuestionPlaceholder(value) {
const core = stripAutogenQuestionSuffix(value).toLowerCase();
return core.length > 0 && AUTOGEN_QUESTION_PLACEHOLDER_PATTERN.test(core);
}
function isLikelyAutogenQuestionTail(value) {
const core = stripAutogenQuestionSuffix(value).toLowerCase();
if (!core) {
return false;
}
if (isAutogenQuestionPlaceholder(core)) {
return true;
}
return AUTOGEN_QUESTION_TAIL_PATTERNS.some((pattern) => pattern.test(core));
}
function mergeAutogenQuestionTail(baseQuestion, tail) {
const base = stripAutogenQuestionSuffix(baseQuestion);
const suffix = stripAutogenQuestionSuffix(tail);
if (!base) {
return suffix ? `${suffix}?` : "";
}
if (!suffix) {
return `${base}?`;
}
return `${base} ${suffix}?`
.replace(/\s+/g, " ")
.trim();
}
function normalizeAutogenQuestionCandidates(candidates) {
const normalized = [];
for (const candidate of candidates) {
const question = sanitizeGeneratedQuestion(candidate);
if (!question) {
continue;
}
if (isAutogenQuestionPlaceholder(question)) {
continue;
}
if (isLikelyAutogenQuestionTail(question) && normalized.length > 0) {
const merged = mergeAutogenQuestionTail(normalized[normalized.length - 1], question);
if (merged) {
normalized[normalized.length - 1] = merged;
}
continue;
}
normalized.push(question);
}
return normalized.filter((item) => item.length > 0);
}
function splitQuestionCandidates(rawText) {
const normalized = repairAutogenMojibake(rawText).replace(/\r/g, "\n").trim();
if (!normalized)
return [];
const unescaped = normalized.replace(/\\"/g, '"').replace(/\\n/g, "\n");
const byLines = unescaped
.split(/\n+/g)
.map((line) => line.replace(/^\s*(?:[-*•]|\d{1,3}[).:]?)\s*/, ""))
.map((line) => sanitizeGeneratedQuestion(line))
.filter((line) => line.length > 0);
if (byLines.length > 1) {
return normalizeAutogenQuestionCandidates(byLines);
}
const questionMarkCount = (unescaped.match(/\?/g) ?? []).length;
if (questionMarkCount > 1) {
const questionChunks = Array.from(unescaped.matchAll(/[^?]+(?:\?|$)/g))
.map((match) => sanitizeGeneratedQuestion(match[0]))
.filter((chunk) => chunk.length > 0);
if (questionChunks.length > 1) {
const canSafelySplit = questionChunks.every((chunk) => !isAutogenQuestionPlaceholder(chunk) &&
!isLikelyAutogenQuestionTail(chunk) &&
sanitizeGeneratedQuestion(chunk).length >= 18);
if (canSafelySplit) {
return normalizeAutogenQuestionCandidates(questionChunks.map((chunk) => (chunk.endsWith("?") ? chunk : `${chunk}?`)));
}
}
}
const quoted = Array.from(unescaped.matchAll(/"([^"\n]{6,}?)"/g))
.map((match) => sanitizeGeneratedQuestion(match[1]))
.filter((line) => line.length > 0);
if (quoted.length > 1) {
return normalizeAutogenQuestionCandidates(quoted);
}
const cleaned = sanitizeGeneratedQuestion(unescaped);
return cleaned ? normalizeAutogenQuestionCandidates([cleaned]) : [];
}
function parseAutogenOutputJson(rawText) {
const cleaned = repairAutogenMojibake(rawText)
.trim()
.replace(/^```json\s*/i, "")
.replace(/^```\s*/i, "")
.replace(/```$/i, "")
.trim();
if (!cleaned)
return null;
try {
return JSON.parse(cleaned);
}
catch {
// continue
}
const arrayStart = cleaned.indexOf("[");
const arrayEnd = cleaned.lastIndexOf("]");
if (arrayStart >= 0 && arrayEnd > arrayStart) {
const fragment = cleaned.slice(arrayStart, arrayEnd + 1);
try {
return JSON.parse(fragment);
}
catch {
// continue
}
}
const objStart = cleaned.indexOf("{");
const objEnd = cleaned.lastIndexOf("}");
if (objStart >= 0 && objEnd > objStart) {
const fragment = cleaned.slice(objStart, objEnd + 1);
try {
return JSON.parse(fragment);
}
catch {
return null;
}
}
return null;
}
function collectQuestionsFromCandidate(value, depth = 0) {
if (depth > 5 || value === null || value === undefined) {
return [];
}
if (Array.isArray(value)) {
const expanded = value.flatMap((item) => collectQuestionsFromCandidate(item, depth + 1));
return normalizeAutogenQuestionCandidates(expanded);
}
if (typeof value === "string") {
const text = value.trim();
if (!text)
return [];
const nestedParsed = parseAutogenOutputJson(text);
if (nestedParsed !== null) {
const nestedQuestions = collectQuestionsFromCandidate(nestedParsed, depth + 1);
if (nestedQuestions.length > 0) {
return nestedQuestions;
}
}
try {
const decoded = JSON.parse(text);
if (decoded !== text) {
const decodedQuestions = collectQuestionsFromCandidate(decoded, depth + 1);
if (decodedQuestions.length > 0) {
return decodedQuestions;
}
}
}
catch {
// ignore non-JSON strings
}
return splitQuestionCandidates(text);
}
const record = toRecord(value);
if (!record) {
return [];
}
const fromQuestions = collectQuestionsFromCandidate(record.questions, depth + 1);
if (fromQuestions.length > 0) {
return fromQuestions;
}
const fallbackText = toStringSafe(record.question ?? record.user_message ?? record.text);
return fallbackText ? splitQuestionCandidates(fallbackText) : [];
}
function extractQuestionsFromAutogenOutput(rawText) {
const parsed = parseAutogenOutputJson(rawText);
const fromParsed = collectQuestionsFromCandidate(parsed);
if (fromParsed.length > 0) {
return fromParsed;
}
return collectQuestionsFromCandidate(rawText);
}
exports.__autoRunsQuestionTestUtils = {
splitQuestionCandidates,
extractQuestionsFromAutogenOutput,
repairAutogenMojibake
};
async function generateQwenSeedQuestionsLive(input) {
const seedExamples = collectCanonicalQuestions(40);
const fallbackExamples = fallbackDomainTemplates(input.domain);
const examples = (seedExamples.length > 0 ? seedExamples : fallbackExamples).slice(0, 8);
const personalityPrompt = input.personalityPrompt ??
"Генерируй реалистичные вопросы бухгалтера по 1С. Разговорный стиль допустим, но смысл должен быть четким.";
const repairedPersonalityPrompt = repairAutogenMojibake(personalityPrompt);
const maxOutputTokens = clampInt(input.llmConfig.max_output_tokens, 300, 3000, 1200);
const temperature = input.llmConfig.temperature === null ? 0.5 : Math.max(0, Math.min(1.5, input.llmConfig.temperature));
const systemPrompt = [
"Ты генератор вопросов для автопрогонов бухгалтерского ассистента по 1С.",
"Возвращай только JSON и никаких пояснений.",
"Ассистент работает в read-only режиме: не проси действий изменения базы."
].join(" ");
const repairedSystemPrompt = repairAutogenMojibake(systemPrompt);
const developerPrompt = [
`Нужно сгенерировать ровно ${input.count} вопросов.`,
"Формат ответа строго:",
'{"questions":["вопрос 1","вопрос 2"]}',
"Требования:",
"1) каждый вопрос отдельный, без дубликатов;",
"2) живой пользовательский язык;",
"3) допустимы легкие разговорные сокращения;",
"4) не выдавай мета-комментарии и не описывай правила."
].join("\n");
const repairedDeveloperPrompt = repairAutogenMojibake(developerPrompt);
const userMessage = [
`Домен: ${input.domain ?? "general"}.`,
`Промпт личности: ${repairedPersonalityPrompt}`,
"Примеры ориентиров по стилю и тематике:",
...examples.map((item, index) => `${index + 1}. ${item}`)
].join("\n");
const repairedUserMessage = repairAutogenMojibake(userMessage);
const response = await input.client.chat({
llmProvider: input.llmConfig.llm_provider,
apiKey: input.llmConfig.api_key,
model: input.llmConfig.model,
baseUrl: input.llmConfig.base_url ?? undefined,
temperature,
maxOutputTokens: maxOutputTokens
}, {
systemPrompt: repairedSystemPrompt,
developerPrompt: repairedDeveloperPrompt,
userMessage: repairedUserMessage,
temperature,
maxOutputTokens
});
const extracted = extractQuestionsFromAutogenOutput(response.outputText);
const normalized = Array.from(new Set(extracted.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0)));
if (normalized.length === 0) {
throw new http_1.ApiError("AUTOGEN_LLM_EMPTY_OUTPUT", "Qwen не вернул пригодные вопросы для автогенерации.", 502, {
model: input.llmConfig.model
});
}
const fallback = generateQwenSeedQuestions(input.count, input.domain);
return Array.from(new Set([...normalized, ...fallback])).slice(0, input.count);
}
function hasAnyRunFilterQuery(query) {
return Boolean(toStringSafe(query.from) ??
toStringSafe(query.to) ??
toStringSafe(query.target) ??
toStringSafe(query.mode) ??
toStringSafe(query.use_mock) ??
toStringSafe(query.prompt_contains));
}
function buildAutogenCaseSetFileName(mode, generationId) {
const now = new Date();
const stamp = [
now.getUTCFullYear(),
String(now.getUTCMonth() + 1).padStart(2, "0"),
String(now.getUTCDate()).padStart(2, "0"),
String(now.getUTCHours()).padStart(2, "0"),
String(now.getUTCMinutes()).padStart(2, "0"),
String(now.getUTCSeconds()).padStart(2, "0")
].join("");
return `assistant_autogen_${mode}_${stamp}_${generationId}.json`;
}
function buildSavedAssistantSessionSnapshotFileName(generationId) {
const now = new Date();
const stamp = [
now.getUTCFullYear(),
String(now.getUTCMonth() + 1).padStart(2, "0"),
String(now.getUTCDate()).padStart(2, "0"),
String(now.getUTCHours()).padStart(2, "0"),
String(now.getUTCMinutes()).padStart(2, "0"),
String(now.getUTCSeconds()).padStart(2, "0")
].join("");
return `assistant_saved_session_${stamp}_${generationId}.json`;
}
function buildAutogenCaseSetPayload(input) {
const normalizedQuestions = Array.from(new Set(input.questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0)));
const cases = normalizedQuestions.map((question, index) => ({
case_id: `AUTO-${String(index + 1).padStart(3, "0")}`,
scenario_tag: `${input.mode}_${input.domain ?? "general"}`,
question_type: "direct",
broadness_level: "medium",
turns: [{ user_message: question }],
expected_hints: {
expected_reply_type: null,
expected_degraded_to: null
}
}));
return {
suite_id: `assistant_autogen_${input.generationId}`,
suite_version: "0.1.0",
schema_version: "assistant_autogen_suite_v0_1",
generated_at: new Date().toISOString(),
generation_id: input.generationId,
mode: input.mode,
domain: input.domain,
scenario_count: cases.length,
case_ids: cases.map((item) => item.case_id),
cases
};
}
function buildSavedSessionCaseSetPayload(input) {
const questions = parseAssistantSessionQuestions(input.questions);
const turns = questions.map((question) => ({
user_message: question
}));
const caseId = "SAVED-001";
return {
suite_id: `assistant_saved_session_${input.generationId}`,
suite_version: "0.1.0",
schema_version: "assistant_saved_session_suite_v0_1",
generated_at: new Date().toISOString(),
generation_id: input.generationId,
mode: "saved_user_sessions",
title: input.title,
scenario_count: turns.length > 0 ? 1 : 0,
case_ids: turns.length > 0 ? [caseId] : [],
cases: turns.length > 0
? [
{
case_id: caseId,
scenario_tag: toStringSafe(input.scenarioTag) ?? "saved_user_sessions",
title: input.title,
question_type: turns.length > 1 ? "followup" : "direct",
broadness_level: "medium",
turns
}
]
: []
};
}
function ensureDirSync(targetDir) {
if (!fs_1.default.existsSync(targetDir)) {
fs_1.default.mkdirSync(targetDir, { recursive: true });
}
}
function writeJsonFile(targetPath, payload) {
ensureDirSync(path_1.default.dirname(targetPath));
fs_1.default.writeFileSync(targetPath, JSON.stringify(payload, null, 2), "utf-8");
}
function rewriteAutoGenCaseSetFile(record) {
const caseSetFile = toStringSafe(record.saved_case_set_file);
if (!caseSetFile) {
return null;
}
const targetPath = path_1.default.resolve(config_1.EVAL_CASES_DIR, caseSetFile);
const payload = record.mode === "saved_user_sessions"
? buildSavedSessionCaseSetPayload({
generationId: record.generation_id,
title: record.title,
questions: record.questions,
scenarioTag: record.context?.saved_case_set_kind === "agent_semantic_scenario"
? "agent_saved_user_sessions"
: "saved_user_sessions"
})
: buildAutogenCaseSetPayload({
generationId: record.generation_id,
mode: record.mode,
domain: record.domain,
questions: record.questions
});
writeJsonFile(targetPath, payload);
return caseSetFile;
}
function writeSavedAssistantSessionSnapshot(input) {
const fileName = buildSavedAssistantSessionSnapshotFileName(input.generationId);
const targetPath = path_1.default.resolve(path_1.default.dirname(config_1.AUTORUN_GENERATOR_HISTORY_FILE), "saved_sessions", fileName);
writeJsonFile(targetPath, {
saved_at: new Date().toISOString(),
generation_id: input.generationId,
mode: "saved_user_sessions",
title: input.title,
source_session_id: input.sessionId,
questions: input.questions,
session: input.session
});
return fileName;
}
function resolveFileInsideDir(baseDir, fileName) {
const normalized = toStringSafe(fileName);
if (!normalized) {
return null;
}
const targetPath = path_1.default.resolve(baseDir, normalized);
const relative = path_1.default.relative(baseDir, targetPath);
if (relative.startsWith("..") || path_1.default.isAbsolute(relative)) {
return null;
}
return targetPath;
}
function safeDeleteFile(targetPath) {
if (!targetPath || !fs_1.default.existsSync(targetPath)) {
return null;
}
fs_1.default.unlinkSync(targetPath);
return targetPath;
}
function collectPostAnalysis(annotations, runMap, limitPerQueue) {
const byDecision = {};
const byQueue = {};
const byDomain = new Map();
const queues = {
routing_extension: [],
policy_fix: [],
capability_registry: [],
soft_boundary: [],
safety_policy: [],
testset_hygiene: [],
covered_ok: []
};
const registry = (0, capabilitiesRegistry_1.loadCapabilitiesRegistry)();
for (const item of annotations) {
byDecision[item.manual_case_decision] = (byDecision[item.manual_case_decision] ?? 0) + 1;
const queueKey = DECISION_QUEUE_MAP[item.manual_case_decision];
byQueue[queueKey] = (byQueue[queueKey] ?? 0) + 1;
const run = runMap.get(item.run_id) ?? null;
const caseSummary = run
? buildCaseSummaries(run.report, run.run_id, false).find((candidate) => candidate.case_id === item.case_id) ?? null
: null;
const nearestGroup = (0, capabilitiesRegistry_1.resolveNearestCapabilityGroup)({
domain: caseSummary?.domain ?? item.context.domain,
queryClass: caseSummary?.query_class ?? item.context.query_class
}) ??
registry.groups[0] ??
null;
const domainKey = caseSummary?.domain ?? item.context.domain ?? "unknown";
byDomain.set(domainKey, (byDomain.get(domainKey) ?? 0) + 1);
const view = {
annotation_id: item.annotation_id,
run_id: item.run_id,
case_id: item.case_id,
message_index: item.message_index,
rating: item.rating,
comment: item.comment,
manual_case_decision: item.manual_case_decision,
annotation_author: item.annotation_author,
updated_at: item.updated_at,
domain: caseSummary?.domain ?? item.context.domain ?? null,
query_class: caseSummary?.query_class ?? item.context.query_class ?? null,
trace_id: item.context.trace_id ?? caseSummary?.trace_id ?? null,
reply_type: item.context.reply_type ?? caseSummary?.reply_type ?? null,
nearest_capability_group: nearestGroup
? {
group_code: nearestGroup.group_code,
group_title: nearestGroup.group_title,
maturity_status: nearestGroup.maturity_status
}
: null
};
if (queueKey === "none") {
if (queues.covered_ok.length < limitPerQueue)
queues.covered_ok.push(view);
continue;
}
if (!queues[queueKey]) {
queues[queueKey] = [];
}
if (queues[queueKey].length < limitPerQueue) {
queues[queueKey].push(view);
}
}
const domainSummary = Array.from(byDomain.entries())
.map(([domain, total]) => ({ domain, total }))
.sort((a, b) => b.total - a.total);
return {
stats: {
annotations_total: annotations.length,
by_decision: byDecision,
by_queue: byQueue,
domains_total: domainSummary.length
},
domain_summary: domainSummary,
queues,
recommended_regression_candidates: [
...queues.routing_extension.slice(0, 20),
...queues.policy_fix.slice(0, 20),
...queues.safety_policy.slice(0, 20)
].slice(0, 60)
};
}
function buildAutoRunsRouter(services, openaiClient = new openaiResponsesClient_1.OpenAIResponsesClient()) {
const router = (0, express_1.Router)();
router.get("/api/autoruns/history", (req, res) => {
const filters = parseFilters(req.query);
const indexed = indexRuns(filters.scan_limit);
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
const summaries = filtered.map((run) => buildRunSummary(run));
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort();
const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort();
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
target: filters.target,
use_mock: filters.use_mock,
prompt_contains: filters.prompt_contains,
mode: filters.mode,
limit: filters.limit,
scan_limit: filters.scan_limit
},
available: {
targets: availableTargets,
modes: availableModes,
prompt_versions: availablePromptVersions
},
items: summaries,
stats: buildHistoryStats(summaries)
});
});
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
if (!runId) {
throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const annotations = readAnnotations();
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const cases = buildCaseSummaries(run.report, run.run_id, true, annotationStatsByCase);
const coverage = buildCoverageFromCases(cases);
(0, http_1.ok)(res, {
ok: true,
run: buildRunSummary(run),
coverage,
cases,
annotations_summary: {
total: annotations.filter((item) => item.run_id === runId).length
},
report: run.report
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
const caseId = String(req.params.case_id ?? "").trim();
if (!runId || !caseId) {
throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const annotations = readAnnotations();
if (caseId === "__all__") {
const dialog = buildRunAggregateDialog(run, annotations);
(0, http_1.ok)(res, {
ok: true,
run_id: runId,
case_id: "__all__",
...dialog,
annotations: annotations
.filter((item) => item.run_id === runId)
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
});
return;
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
const messages = withMessageAnnotations(runId, caseId, dialog.messages, annotations);
(0, http_1.ok)(res, {
ok: true,
run_id: runId,
case_id: caseId,
...dialog,
messages,
annotations: annotations
.filter((item) => item.run_id === runId && item.case_id === caseId)
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/annotations", (req, res, next) => {
try {
const runIdFilter = toStringSafe(req.query.run_id);
const caseIdFilter = toStringSafe(req.query.case_id);
const minRatingRaw = toNumberSafe(req.query.min_rating);
const minRating = minRatingRaw === null ? null : clampInt(minRatingRaw, 1, 5, 1);
const decisionFilter = parseDecisionFilter(req.query.manual_case_decision);
const limit = clampInt(toNumberSafe(req.query.limit), 1, 2000, 400);
const scanLimit = clampInt(toNumberSafe(req.query.scan_limit), 50, 5000, 2500);
const annotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (caseIdFilter ? item.case_id === caseIdFilter : true))
.filter((item) => (minRating === null ? true : item.rating >= minRating))
.filter((item) => (decisionFilter === "all" ? true : item.manual_case_decision === decisionFilter))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, limit);
const runIndex = indexRuns(scanLimit);
const runMap = new Map(runIndex.map((item) => [item.run_id, item]));
const items = annotations.map((item) => {
const run = runMap.get(item.run_id) ?? null;
const runSummary = run ? buildRunSummary(run) : null;
const cases = run ? buildCaseSummaries(run.report, run.run_id, false) : [];
const caseSummary = cases.find((candidate) => candidate.case_id === item.case_id) ?? null;
return {
...item,
run: runSummary,
case_summary: caseSummary,
technical_context: {
report_path: run?.report_path ?? null,
trace_id: item.context.trace_id,
reply_type: item.context.reply_type,
domain: item.context.domain,
query_class: item.context.query_class,
checks: caseSummary?.checks ?? null,
metric_subscores: caseSummary?.metric_subscores ?? null
}
};
});
const avgRating = items.length > 0 ? Number((items.reduce((acc, item) => acc + item.rating, 0) / items.length).toFixed(2)) : null;
const byDecision = items.reduce((acc, item) => {
acc[item.manual_case_decision] = (acc[item.manual_case_decision] ?? 0) + 1;
return acc;
}, {});
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
case_id: caseIdFilter ?? null,
min_rating: minRating,
manual_case_decision: decisionFilter,
limit
},
stats: {
total: items.length,
avg_rating: avgRating,
by_decision: byDecision
},
available_manual_case_decisions: MANUAL_CASE_DECISIONS,
manual_case_decision_schema: readManualDecisionSchema(),
items
});
}
catch (error) {
next(error);
}
});
router.post("/api/autoruns/annotations", (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "JSON body is required", 400);
}
const runId = toStringSafe(body.run_id);
const caseId = toStringSafe(body.case_id);
const messageIndexRaw = toNumberSafe(body.message_index);
const ratingRaw = toNumberSafe(body.rating);
const comment = parseComment(body.comment);
const manualCaseDecision = parseManualCaseDecision(body.manual_case_decision);
const annotationAuthor = parseAnnotationAuthor(body.annotation_author);
if (!runId || !caseId) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "run_id and case_id are required", 400);
}
if (messageIndexRaw === null) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "message_index is required", 400);
}
const messageIndex = clampInt(messageIndexRaw, 0, 100_000, 0);
if (ratingRaw === null) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "rating is required", 400);
}
const rating = clampInt(ratingRaw, 1, 5, 1);
if (comment.length === 0) {
throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "comment is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const cases = buildCaseSummaries(run.report, run.run_id, false);
const caseSummary = cases.find((item) => item.case_id === caseId) ?? null;
if (!caseSummary) {
throw new http_1.ApiError("AUTORUN_CASE_NOT_FOUND", `Case not found: ${caseId} in run ${runId}`, 404);
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
if (messageIndex >= dialog.messages.length) {
throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_FOUND", `Message index ${messageIndex} out of range`, 400);
}
const targetMessage = dialog.messages[messageIndex];
const targetRole = toStringSafe(targetMessage.role) ?? "unknown";
if (targetRole !== "assistant") {
throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_ASSISTANT", "Only assistant answers can be annotated", 400);
}
const pairedUserQuestion = [...dialog.messages.slice(0, messageIndex)]
.reverse()
.find((item) => (toStringSafe(item.role) ?? "") === "user");
const nowIso = new Date().toISOString();
const annotations = readAnnotations();
const key = annotationKey(runId, caseId, messageIndex);
const existingIndex = annotations.findIndex((item) => annotationKey(item.run_id, item.case_id, item.message_index) === key);
const existing = existingIndex >= 0 ? annotations[existingIndex] : null;
const annotation = {
annotation_id: existing?.annotation_id ?? generateAnnotationId(),
run_id: runId,
case_id: caseId,
session_id: caseSummary.session_id,
message_index: messageIndex,
rating,
comment,
manual_case_decision: manualCaseDecision,
annotation_author: annotationAuthor,
resolved: existing?.resolved ?? false,
resolved_at: existing?.resolved_at ?? null,
resolved_by: existing?.resolved_by ?? null,
created_at: existing?.created_at ?? nowIso,
updated_at: nowIso,
context: {
message_id: toStringSafe(targetMessage.message_id),
trace_id: toStringSafe(targetMessage.trace_id) ?? caseSummary.trace_id,
reply_type: toStringSafe(targetMessage.reply_type) ?? caseSummary.reply_type,
eval_target: run.eval_target,
prompt_version: toStringSafe(run.report.prompt_version),
domain: caseSummary.domain,
query_class: caseSummary.query_class,
question_text: toStringSafe(pairedUserQuestion?.text),
answer_text: toStringSafe(targetMessage.text)
}
};
if (existingIndex >= 0) {
annotations[existingIndex] = annotation;
}
else {
annotations.push(annotation);
}
writeAnnotations(annotations);
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const caseStats = annotationStatsByCase.get(caseId) ?? null;
(0, http_1.ok)(res, {
ok: true,
annotation,
case_annotation_stats: caseStats
});
}
catch (error) {
next(error);
}
});
router.patch("/api/autoruns/annotations/:annotation_id", (req, res, next) => {
try {
const annotationId = toStringSafe(req.params.annotation_id);
if (!annotationId) {
throw new http_1.ApiError("INVALID_ANNOTATION_ID", "annotation_id is required", 400);
}
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_ANNOTATION_PATCH", "JSON body is required", 400);
}
const resolved = toBooleanSafe(body.resolved);
if (resolved === null) {
throw new http_1.ApiError("INVALID_ANNOTATION_PATCH", "resolved flag is required", 400);
}
const resolvedBy = parseAnnotationAuthor(body.resolved_by);
const annotations = readAnnotations();
const index = annotations.findIndex((item) => item.annotation_id === annotationId);
if (index < 0) {
throw new http_1.ApiError("ANNOTATION_NOT_FOUND", `Annotation not found: ${annotationId}`, 404);
}
const nowIso = new Date().toISOString();
const current = annotations[index];
const updated = {
...current,
resolved,
resolved_at: resolved ? nowIso : null,
resolved_by: resolved ? resolvedBy ?? current.resolved_by ?? null : null,
updated_at: nowIso
};
annotations[index] = updated;
writeAnnotations(annotations);
const statsByCase = buildAnnotationStatsMap(updated.run_id, annotations);
const caseStats = statsByCase.get(updated.case_id) ?? null;
(0, http_1.ok)(res, {
ok: true,
annotation: updated,
case_annotation_stats: caseStats
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/manual-decision-schema", (_req, res) => {
(0, http_1.ok)(res, {
ok: true,
schema: readManualDecisionSchema(),
enum: MANUAL_CASE_DECISIONS
});
});
router.get("/api/autoruns/post-analysis", (req, res, next) => {
try {
const query = req.query;
const runIdFilter = toStringSafe(query.run_id);
const limitPerQueue = clampInt(toNumberSafe(query.limit_per_queue), 5, 250, 40);
const annotationLimit = clampInt(toNumberSafe(query.annotation_limit), 20, 5000, 1500);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 2500);
const runFilters = parseFilters(query);
const applyRunFilters = hasAnyRunFilterQuery(query);
const runIndex = indexRuns(Math.max(scanLimit, runFilters.scan_limit));
const filteredRuns = applyRunFilters ? runIndex.filter((run) => matchesFilters(run, runFilters)) : runIndex;
const runMap = new Map(filteredRuns.map((run) => [run.run_id, run]));
const scopedAnnotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (runMap.size > 0 ? runMap.has(item.run_id) : true))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, annotationLimit);
const analysis = collectPostAnalysis(scopedAnnotations, runMap, limitPerQueue);
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
run_filters_applied: applyRunFilters,
limit_per_queue: limitPerQueue,
annotation_limit: annotationLimit,
scan_limit: scanLimit
},
runs_considered: filteredRuns.slice(0, 500).map((item) => buildRunSummary(item)),
manual_case_decision_schema: readManualDecisionSchema(),
post_analysis: analysis
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/autogen/history", (req, res, next) => {
try {
const limit = clampInt(toNumberSafe(req.query.limit), 1, 500, 120);
const rawMode = toStringSafe(req.query.mode);
const includeAllModes = !rawMode || !isAutoGenMode(rawMode);
const modeFilter = rawMode ?? "codex_creative";
const items = hydrateAutoGenHistoryForApi(readAutoGenHistory()
.filter((item) => (includeAllModes ? true : item.mode === modeFilter))
.slice(0, limit));
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
items
});
}
catch (error) {
next(error);
}
});
router.get("/api/autoruns/autogen/personality-catalog", (_req, res, next) => {
try {
(0, http_1.ok)(res, {
ok: true,
generated_at: new Date().toISOString(),
items: buildAutogenPersonalityCatalog()
});
}
catch (error) {
next(error);
}
});
router.post("/api/autoruns/autogen/save-assistant-session", (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_AUTOGEN_SAVE_SESSION_PAYLOAD", "JSON body is required", 400);
}
const sessionId = toStringSafe(body.session_id);
const title = parseAutoGenTitle(body.title);
const generatedBy = parseAnnotationAuthor(body.generated_by);
const context = toRecord(body.context);
if (!sessionId) {
throw new http_1.ApiError("INVALID_AUTOGEN_SAVE_SESSION_PAYLOAD", "session_id is required", 400);
}
if (!title) {
throw new http_1.ApiError("INVALID_AUTOGEN_SAVE_SESSION_PAYLOAD", "title is required", 400);
}
const session = services.assistantService.getSession(sessionId);
if (!session) {
throw new http_1.ApiError("ASSISTANT_SESSION_NOT_FOUND", `Session not found: ${sessionId}`, 404);
}
const questions = session.items
.filter((item) => item.role === "user")
.map((item) => sanitizeGeneratedQuestion(item.text))
.filter((item) => item.length > 0);
if (questions.length === 0) {
throw new http_1.ApiError("ASSISTANT_SESSION_EMPTY", "Assistant session has no user questions to save.", 400);
}
const generationId = generateAutogenId();
const caseSetFile = buildAutogenCaseSetFileName("saved_user_sessions", generationId);
const caseSetPath = path_1.default.resolve(config_1.EVAL_CASES_DIR, caseSetFile);
writeJsonFile(caseSetPath, buildSavedSessionCaseSetPayload({
generationId,
title,
questions,
scenarioTag: "saved_user_sessions"
}));
const snapshotFile = writeSavedAssistantSessionSnapshot({
generationId,
sessionId,
title,
session: session,
questions
});
const record = {
generation_id: generationId,
created_at: new Date().toISOString(),
mode: "saved_user_sessions",
title,
count: questions.length,
domain: null,
questions,
generated_by: generatedBy,
saved_case_set_file: caseSetFile,
context: {
llm_provider: toStringSafe(context?.llm_provider),
model: toStringSafe(context?.model),
assistant_prompt_version: toStringSafe(context?.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(context?.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(context?.prompt_fingerprint)
? repairAutogenMojibake(String(context?.prompt_fingerprint))
: null,
autogen_personality_id: null,
autogen_personality_prompt: null,
source_session_id: sessionId,
saved_session_file: snapshotFile,
saved_case_set_kind: "assistant_session_scenario"
}
};
const history = readAutoGenHistory();
history.unshift(record);
writeAutoGenHistory(history.slice(0, 500));
(0, http_1.ok)(res, {
ok: true,
generation: record
});
}
catch (error) {
next(error);
}
});
router.patch("/api/autoruns/autogen/history/:generation_id/questions", (req, res, next) => {
try {
const generationId = toStringSafe(req.params.generation_id);
const body = toRecord(req.body);
if (!generationId) {
throw new http_1.ApiError("INVALID_AUTOGEN_GENERATION_ID", "generation_id is required", 400);
}
if (!body) {
throw new http_1.ApiError("INVALID_AUTOGEN_QUESTIONS_PAYLOAD", "JSON body is required", 400);
}
const questions = parseAssistantSessionQuestions(body.questions);
if (questions.length === 0) {
throw new http_1.ApiError("INVALID_AUTOGEN_QUESTIONS_PAYLOAD", "questions must contain at least one item", 400);
}
const history = readAutoGenHistory();
const targetIndex = history.findIndex((item) => item.generation_id === generationId);
if (targetIndex < 0) {
throw new http_1.ApiError("AUTOGEN_GENERATION_NOT_FOUND", `Generation not found: ${generationId}`, 404);
}
const current = history[targetIndex];
const updated = {
...current,
count: questions.length,
questions
};
rewriteAutoGenCaseSetFile(updated);
history[targetIndex] = updated;
writeAutoGenHistory(history);
(0, http_1.ok)(res, {
ok: true,
generation: updated
});
}
catch (error) {
next(error);
}
});
router.delete("/api/autoruns/autogen/history/:generation_id", (req, res, next) => {
try {
const generationId = toStringSafe(req.params.generation_id);
if (!generationId) {
throw new http_1.ApiError("INVALID_AUTOGEN_GENERATION_ID", "generation_id is required", 400);
}
const history = readAutoGenHistory();
const targetIndex = history.findIndex((item) => item.generation_id === generationId);
if (targetIndex < 0) {
throw new http_1.ApiError("AUTOGEN_GENERATION_NOT_FOUND", `Generation not found: ${generationId}`, 404);
}
const target = history[targetIndex];
const deletedFiles = [];
const caseSetPath = resolveFileInsideDir(config_1.EVAL_CASES_DIR, target.saved_case_set_file);
const savedSessionPath = resolveFileInsideDir(path_1.default.resolve(path_1.default.dirname(config_1.AUTORUN_GENERATOR_HISTORY_FILE), "saved_sessions"), target.context?.saved_session_file ?? null);
const deletedCaseSet = safeDeleteFile(caseSetPath);
if (deletedCaseSet) {
deletedFiles.push(deletedCaseSet);
}
const deletedSavedSession = safeDeleteFile(savedSessionPath);
if (deletedSavedSession) {
deletedFiles.push(deletedSavedSession);
}
history.splice(targetIndex, 1);
writeAutoGenHistory(history);
(0, http_1.ok)(res, {
ok: true,
generation_id: generationId,
deleted_files: deletedFiles
});
}
catch (error) {
next(error);
}
});
router.post("/api/autoruns/autogen/generate", async (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new http_1.ApiError("INVALID_AUTOGEN_PAYLOAD", "JSON body is required", 400);
}
const mode = parseAutoGenMode(body.mode);
const count = parseAutogenCount(body.count);
const domain = parseAutogenDomain(body.domain);
const persistCaseSet = toBooleanSafe(body.persist_to_eval_cases) ?? true;
const generatedBy = parseAnnotationAuthor(body.generated_by);
const context = toRecord(body.context);
const llmConfig = parseAutogenLlmRuntimeConfig(body, context);
const personalityPrompt = toStringSafe(context?.autogen_personality_prompt);
if (mode === "saved_user_sessions") {
throw new http_1.ApiError("AUTOGEN_MODE_NOT_SUPPORTED", "Use `/api/autoruns/autogen/save-assistant-session` to save user sessions.", 400);
}
let questions = [];
if (mode === "qwen_seed") {
if (!llmConfig) {
throw new http_1.ApiError("AUTOGEN_LLM_CONFIG_REQUIRED", "Для режима qwen_seed нужен активный LLM-контур (provider/model/baseUrl) из настроек подключения.", 400);
}
questions = await generateQwenSeedQuestionsLive({
count,
domain,
personalityPrompt,
llmConfig,
client: openaiClient
});
}
else {
questions = generateCodexCreativeQuestions(count, domain);
}
questions = Array.from(new Set(questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))).slice(0, count);
const generationId = generateAutogenId();
let savedCaseSetFile = null;
if (persistCaseSet) {
if (!fs_1.default.existsSync(config_1.EVAL_CASES_DIR)) {
fs_1.default.mkdirSync(config_1.EVAL_CASES_DIR, { recursive: true });
}
const fileName = buildAutogenCaseSetFileName(mode, generationId);
const filePath = path_1.default.resolve(config_1.EVAL_CASES_DIR, fileName);
const payload = buildAutogenCaseSetPayload({
generationId,
mode,
domain,
questions
});
fs_1.default.writeFileSync(filePath, JSON.stringify(payload, null, 2), "utf-8");
savedCaseSetFile = fileName;
}
const record = {
generation_id: generationId,
created_at: new Date().toISOString(),
mode,
title: null,
count: questions.length,
domain,
questions,
generated_by: generatedBy,
saved_case_set_file: savedCaseSetFile,
context: context
? {
llm_provider: toStringSafe(context.llm_provider),
model: toStringSafe(context.model),
assistant_prompt_version: toStringSafe(context.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(context.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(context.prompt_fingerprint)
? repairAutogenMojibake(String(context.prompt_fingerprint))
: null,
autogen_personality_id: toStringSafe(context.autogen_personality_id),
autogen_personality_prompt: toStringSafe(context.autogen_personality_prompt)
? repairAutogenMojibake(String(context.autogen_personality_prompt))
: null,
source_session_id: null,
saved_session_file: null,
saved_case_set_kind: "single_turn_list"
}
: null
};
const history = readAutoGenHistory();
history.unshift(record);
writeAutoGenHistory(history.slice(0, 500));
(0, http_1.ok)(res, {
ok: true,
generation: record
});
}
catch (error) {
next(error);
}
});
return router;
}