"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.__autoRunsQuestionTestUtils = void 0; exports.buildAutoRunsRouter = buildAutoRunsRouter; const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const express_1 = require("express"); const iconv_lite_1 = __importDefault(require("iconv-lite")); const config_1 = require("../config"); const http_1 = require("../utils/http"); const capabilitiesRegistry_1 = require("../services/capabilitiesRegistry"); const openaiResponsesClient_1 = require("../services/openaiResponsesClient"); const MANUAL_CASE_DECISIONS = [ "covered_ok", "covered_but_bad_answer", "candidate_for_implementation", "needs_routing_extension", "out_of_scope_but_answer_softly", "unsafe_question_limit_strictly", "needs_dialog_policy_fix", "needs_capability_registry_update", "bad_test_case" ]; const DECISION_QUEUE_MAP = { covered_ok: "none", covered_but_bad_answer: "policy_fix", candidate_for_implementation: "routing_extension", needs_routing_extension: "routing_extension", out_of_scope_but_answer_softly: "soft_boundary", unsafe_question_limit_strictly: "safety_policy", needs_dialog_policy_fix: "policy_fix", needs_capability_registry_update: "capability_registry", bad_test_case: "testset_hygiene" }; function toRecord(value) { if (!value || typeof value !== "object" || Array.isArray(value)) { return null; } return value; } function toArray(value) { return Array.isArray(value) ? value : []; } function toStringSafe(value) { if (typeof value !== "string") { return null; } const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } function toNumberSafe(value) { if (typeof value === "number" && Number.isFinite(value)) { return value; } if (typeof value === "string" && value.trim().length > 0) { const parsed = Number(value); return Number.isFinite(parsed) ? parsed : null; } return null; } function toBooleanSafe(value) { if (typeof value === "boolean") { return value; } if (typeof value === "string") { const lowered = value.trim().toLowerCase(); if (["1", "true", "yes", "on"].includes(lowered)) return true; if (["0", "false", "no", "off"].includes(lowered)) return false; } return null; } function parseDateMs(value) { const asString = toStringSafe(value); if (!asString) { return null; } const ms = Date.parse(asString); return Number.isFinite(ms) ? ms : null; } function clampInt(value, min, max, fallback) { if (value === null || !Number.isFinite(value)) { return fallback; } const rounded = Math.trunc(value); if (rounded < min) return min; if (rounded > max) return max; return rounded; } function parseManualCaseDecision(value, fallback = "needs_dialog_policy_fix") { const normalized = toStringSafe(value); if (!normalized) return fallback; return (MANUAL_CASE_DECISIONS.includes(normalized) ? normalized : fallback); } function parseAnnotationAuthor(value) { const author = toStringSafe(value); if (!author) return null; return author.slice(0, 80); } function parseAnnotationResolved(value, fallback = false) { const parsed = toBooleanSafe(value); return parsed === null ? fallback : parsed; } function readManualDecisionSchema() { const fallback = { schema_version: "manual_case_decision_schema_v1_fallback", enum: MANUAL_CASE_DECISIONS, labels: { covered_ok: "Покрыто и ок", covered_but_bad_answer: "Покрыто, но ответ плохой", candidate_for_implementation: "Кандидат на внедрение", needs_routing_extension: "Нужно расширение маршрутизации", out_of_scope_but_answer_softly: "Вне скоупа, но нужен мягкий ответ", unsafe_question_limit_strictly: "Высокий риск, строгие ограничения", needs_dialog_policy_fix: "Нужен фикс диалоговой политики", needs_capability_registry_update: "Нужно обновить реестр возможностей", bad_test_case: "Плохой тест-кейс" }, queue_mapping: DECISION_QUEUE_MAP }; if (!fs_1.default.existsSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE)) { return fallback; } try { const parsed = JSON.parse(fs_1.default.readFileSync(config_1.MANUAL_CASE_DECISION_SCHEMA_FILE, "utf-8")); const record = toRecord(parsed); return record ?? fallback; } catch { return fallback; } } function readAutoGenHistory() { if (!fs_1.default.existsSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE)) return []; try { const parsed = JSON.parse(fs_1.default.readFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, "utf-8")); if (!Array.isArray(parsed)) return []; return parsed .map((item) => toRecord(item)) .filter((item) => item !== null) .map((item) => ({ generation_id: toStringSafe(item.generation_id) ?? "", created_at: toStringSafe(item.created_at) ?? new Date().toISOString(), mode: toStringSafe(item.mode) ?? "codex_creative", count: clampInt(toNumberSafe(item.count), 1, 300, 20), domain: toStringSafe(item.domain), questions: toArray(item.questions) .map((q) => toStringSafe(q)) .filter((q) => q !== null) .map((q) => sanitizeGeneratedQuestion(q)) .filter((q) => q.length > 0) .slice(0, 500), generated_by: toStringSafe(item.generated_by), saved_case_set_file: toStringSafe(item.saved_case_set_file), context: toRecord(item.context) ? { llm_provider: toStringSafe(toRecord(item.context)?.llm_provider), model: toStringSafe(toRecord(item.context)?.model), assistant_prompt_version: toStringSafe(toRecord(item.context)?.assistant_prompt_version), decomposition_prompt_version: toStringSafe(toRecord(item.context)?.decomposition_prompt_version), prompt_fingerprint: toStringSafe(toRecord(item.context)?.prompt_fingerprint) ? repairAutogenMojibake(String(toRecord(item.context)?.prompt_fingerprint)) : null, autogen_personality_id: toStringSafe(toRecord(item.context)?.autogen_personality_id), autogen_personality_prompt: toStringSafe(toRecord(item.context)?.autogen_personality_prompt) ? repairAutogenMojibake(String(toRecord(item.context)?.autogen_personality_prompt)) : null } : null })) .filter((item) => item.generation_id.length > 0) .sort((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at)); } catch { return []; } } function writeAutoGenHistory(records) { const dir = path_1.default.dirname(config_1.AUTORUN_GENERATOR_HISTORY_FILE); if (!fs_1.default.existsSync(dir)) { fs_1.default.mkdirSync(dir, { recursive: true }); } fs_1.default.writeFileSync(config_1.AUTORUN_GENERATOR_HISTORY_FILE, JSON.stringify(records, null, 2), "utf-8"); } function readEvalDatasetCases(filePath) { try { const parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8")); if (Array.isArray(parsed)) { return parsed.map((item) => toRecord(item)).filter((item) => item !== null); } const record = toRecord(parsed); if (!record) return []; const cases = toArray(record.cases).map((item) => toRecord(item)).filter((item) => item !== null); return cases; } catch { return []; } } function collectCanonicalQuestions(limit = 300) { if (!fs_1.default.existsSync(config_1.EVAL_DATASETS_DIR)) { return []; } const entries = fs_1.default.readdirSync(config_1.EVAL_DATASETS_DIR, { withFileTypes: true }); const questions = []; for (const entry of entries) { if (!entry.isFile() || !entry.name.endsWith(".json")) continue; const fullPath = path_1.default.resolve(config_1.EVAL_DATASETS_DIR, entry.name); const cases = readEvalDatasetCases(fullPath); for (const testCase of cases) { const rawQuestion = toStringSafe(testCase.raw_question) ?? toStringSafe(testCase.user_message) ?? toStringSafe(testCase.query); if (rawQuestion) { questions.push(sanitizeGeneratedQuestion(rawQuestion)); } } } return Array.from(new Set(questions.filter((item) => item.length > 0))).slice(0, limit); } function normalizeDomainHint(value) { const domain = toStringSafe(value); if (!domain) return null; return domain.toLowerCase(); } function buildAutogenPromptFromCapabilityGroup(group) { const supported = group.supported_operations.slice(0, 3).join(", "); const examples = group.typical_queries.slice(0, 2).join(" | "); const hints = group.one_c_hints.slice(0, 2).join(", "); const operationsPart = supported ? ` Опирайся на операции: ${supported}.` : ""; const examplesPart = examples ? ` Ближайшие формулировки: ${examples}.` : ""; const hintsPart = hints ? ` Можно мягко упоминать контекст 1С: ${hints}.` : ""; return (`Генерируй реалистичные вопросы бухгалтера по группе "${group.group_title}".` + ` Добавляй живую разговорную форму и опечатки, но сохраняй бизнес-смысл.${operationsPart}${examplesPart}${hintsPart}` + " Не выдумывай операции вне read-only режима."); } function buildAutogenPersonalityCatalog() { const builtIn = [ { id: "general", label: "Общий контур", domain: null, default_prompt: "Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл.", source: "built_in" } ]; const registry = (0, capabilitiesRegistry_1.loadCapabilitiesRegistry)(); const registryBased = registry.groups.map((group) => ({ id: `registry_${group.group_code}`, label: `${group.group_title} (реестр)`, domain: group.group_code, default_prompt: buildAutogenPromptFromCapabilityGroup(group), source: "capabilities_registry" })); const dedup = new Map(); for (const item of [...builtIn, ...registryBased]) { if (!item.id.trim()) continue; if (!dedup.has(item.id)) { dedup.set(item.id, item); } } return [...dedup.values()].map((item) => ({ ...item, label: repairAutogenMojibake(item.label), default_prompt: repairAutogenMojibake(item.default_prompt) })); } function fallbackDomainTemplates(domain) { if (domain?.includes("vat") || domain?.includes("ндс")) { return [ "Сколько НДС к уплате на дату по организации?", "Покажи прогноз НДС за период по организации.", "Почему по НДС сейчас ноль и из чего сложился расчет?" ]; } if (domain?.includes("counter") || domain?.includes("контраг")) { return [ "Покажи топ контрагентов по сумме платежей за период.", "Какой самый крупный договор у выбранной организации?", "Какие документы были по контрагенту за весь период?" ]; } if (domain?.includes("settlement") || domain?.includes("задолж") || domain?.includes("расчет")) { return [ "Какие незакрытые расчеты висят на конец периода?", "Есть ли незакрытые авансы по поставщикам?", "Покажи цепочки закрытия по счетам 60/62." ]; } return [ "С какой организацией сейчас можно работать в активном контуре?", "Покажи ключевые операции за выбранный период.", "Какие вопросы по этому домену ассистент поддерживает прямо сейчас?" ]; } function mutateIntoQwenStyle(base, index) { const wrappers = ["йо ", "слушай ", "подскажи плиз ", "короче ", "мож ", "а ну-ка "]; const tails = ["", " без воды", " по факту", " и коротко", " прям сейчас", " за весь период"]; const typoMap = [ [/\bкомпания\b/gi, "компиния"], [/\bсейчас\b/gi, "щас"], [/\bпожалуйста\b/gi, "плиз"], [/\bкакая\b/gi, "кака"], [/\bчто\b/gi, "че"] ]; const prefix = wrappers[index % wrappers.length]; const tail = tails[index % tails.length]; let text = `${prefix}${base}${tail}`.trim(); if (index % 2 === 0) { const [pattern, replacement] = typoMap[index % typoMap.length]; text = text.replace(pattern, replacement); } return text; } function generateQwenSeedQuestions(count, domain) { const seed = collectCanonicalQuestions(450); const source = seed.length > 0 ? seed : fallbackDomainTemplates(domain); const filtered = domain ? source.filter((item) => item.toLowerCase().includes(domain) || fallbackDomainTemplates(domain).includes(item)) : source; const bag = filtered.length > 0 ? filtered : source; const out = []; for (let index = 0; index < count; index += 1) { const base = bag[index % bag.length]; out.push(sanitizeGeneratedQuestion(mutateIntoQwenStyle(base, index))); } return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count); } function generateCodexCreativeQuestions(count, domain) { const domainTemplates = fallbackDomainTemplates(domain); const patterns = [ "Дай бизнес-срез по состоянию на дату: {q}", "Нужен аккуратный ответ как бухгалтеру: {q}", "Если данных не хватает, скажи что уточнить, но сначала попробуй: {q}", "Сформулируй результат без технички и с шагом дальше: {q}", "Проверь в read-only и скажи что видно: {q}" ]; const out = []; for (let index = 0; index < count; index += 1) { const base = domainTemplates[index % domainTemplates.length]; const pattern = patterns[index % patterns.length]; out.push(sanitizeGeneratedQuestion(pattern.replace("{q}", base))); } return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count); } function generateAutogenId() { return `gen-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`; } function readAnnotations() { if (!fs_1.default.existsSync(config_1.AUTORUN_ANNOTATIONS_FILE)) { return []; } try { const raw = fs_1.default.readFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, "utf-8"); const parsed = JSON.parse(raw); if (!Array.isArray(parsed)) { return []; } return parsed .map((item) => toRecord(item)) .filter((item) => item !== null) .map((item) => { const context = toRecord(item.context); return { annotation_id: toStringSafe(item.annotation_id) ?? "", run_id: toStringSafe(item.run_id) ?? "", case_id: toStringSafe(item.case_id) ?? "", session_id: toStringSafe(item.session_id) ?? "", message_index: clampInt(toNumberSafe(item.message_index), 0, 100_000, 0), rating: clampInt(toNumberSafe(item.rating), 1, 5, 1), comment: toStringSafe(item.comment) ?? "", manual_case_decision: parseManualCaseDecision(item.manual_case_decision), annotation_author: parseAnnotationAuthor(item.annotation_author), resolved: parseAnnotationResolved(item.resolved), resolved_at: toStringSafe(item.resolved_at), resolved_by: parseAnnotationAuthor(item.resolved_by), created_at: toStringSafe(item.created_at) ?? new Date().toISOString(), updated_at: toStringSafe(item.updated_at) ?? new Date().toISOString(), context: { message_id: toStringSafe(context?.message_id), trace_id: toStringSafe(context?.trace_id), reply_type: toStringSafe(context?.reply_type), eval_target: toStringSafe(context?.eval_target) ?? "unknown", prompt_version: toStringSafe(context?.prompt_version), domain: toStringSafe(context?.domain), query_class: toStringSafe(context?.query_class), question_text: toStringSafe(context?.question_text), answer_text: toStringSafe(context?.answer_text) } }; }) .filter((item) => item.annotation_id && item.run_id && item.case_id); } catch { return []; } } function writeAnnotations(items) { fs_1.default.writeFileSync(config_1.AUTORUN_ANNOTATIONS_FILE, JSON.stringify(items, null, 2), "utf-8"); } function annotationKey(runId, caseId, messageIndex) { return `${runId}::${caseId}::${messageIndex}`; } function buildAnnotationStatsMap(runId, annotations) { const scoped = annotations.filter((item) => item.run_id === runId); const buckets = new Map(); for (const item of scoped) { const bucket = buckets.get(item.case_id) ?? { count: 0, ratings: [], latestMs: null }; bucket.count += 1; bucket.ratings.push(item.rating); const ms = Date.parse(item.updated_at); if (Number.isFinite(ms) && (bucket.latestMs === null || ms > bucket.latestMs)) { bucket.latestMs = ms; } buckets.set(item.case_id, bucket); } const result = new Map(); for (const [caseId, bucket] of buckets.entries()) { const avg = bucket.ratings.length > 0 ? Number((bucket.ratings.reduce((a, b) => a + b, 0) / bucket.ratings.length).toFixed(2)) : null; result.set(caseId, { count: bucket.count, latest_at: bucket.latestMs === null ? null : new Date(bucket.latestMs).toISOString(), avg_rating: avg }); } return result; } function buildAnnotationsByMessageIndex(runId, caseId, annotations) { const map = new Map(); for (const item of annotations) { if (item.run_id !== runId || item.case_id !== caseId) continue; const current = map.get(item.message_index); const currentMs = current ? Date.parse(current.updated_at) : null; const nextMs = Date.parse(item.updated_at); if (!current || (!Number.isNaN(nextMs) && (currentMs === null || nextMs >= currentMs))) { map.set(item.message_index, item); } } return map; } function resolveRunTarget(input) { const explicit = toStringSafe(input.report.eval_target); if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") { return explicit; } if (input.runId.startsWith("assistant-stage1-")) return "assistant_stage1"; if (input.runId.startsWith("assistant-stage2-")) return "assistant_stage2"; if (input.runId.startsWith("assistant-p0-")) return "assistant_p0"; if (input.runId.startsWith("eval-")) return "normalizer"; if (input.reportPath.endsWith(".report.json")) return "normalizer"; return "unknown"; } function normalizeTimestamp(report, fileMtimeMs) { const first = parseDateMs(report.run_timestamp); if (first !== null) { return { iso: new Date(first).toISOString(), ms: first }; } const second = parseDateMs(report.timestamp); if (second !== null) { return { iso: new Date(second).toISOString(), ms: second }; } return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs }; } function rateToPercent(value) { if (value === null) return null; if (value <= 1.2) return Math.max(0, Math.min(100, value * 100)); return Math.max(0, Math.min(100, value)); } function scoreToPercent(value) { if (value === null) return null; if (value <= 5.2) return Math.max(0, Math.min(100, (value / 5) * 100)); return Math.max(0, Math.min(100, value)); } function average(values) { const filtered = values.filter((item) => typeof item === "number" && Number.isFinite(item)); if (filtered.length === 0) { return null; } const sum = filtered.reduce((acc, item) => acc + item, 0); return Number((sum / filtered.length).toFixed(2)); } function getMetricRecord(report) { const metrics = toRecord(report.metrics); if (!metrics) return null; const raw = toRecord(metrics.raw); return raw ?? metrics; } function computeScoreIndex(report, target) { const metrics = getMetricRecord(report); if (!metrics) { return null; } if (target === "assistant_p0") { return average([ rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)), scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)), rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)), rateToPercent(toNumberSafe(metrics.route_correctness_rate)), rateToPercent(toNumberSafe(metrics.domain_purity_rate)), rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)), rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate)) ]); } if (target === "assistant_stage1") { return average([ rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)), rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)), rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)), rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)), scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)), scoreToPercent(toNumberSafe(metrics.followup_context_retention_score)), rateToPercent(toNumberSafe(metrics.stage4_contract_compliance_rate)) ]); } if (target === "assistant_stage2") { return average([ rateToPercent(toNumberSafe(metrics.problem_unit_precision)), rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)), rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)), scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)), scoreToPercent(toNumberSafe(metrics.problem_clarity_score)), rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)), rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)) ]); } return average([ rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)), rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)), rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)), rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0)) ]); } function countFailures(report) { const acceptanceGate = toRecord(report.acceptance_gate); const baselineGate = toRecord(report.baseline_stability_gate); const blocking = toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length; const quality = toArray(acceptanceGate?.quality_failures).length + toArray(baselineGate?.legacy_quality_failures).length + toArray(baselineGate?.quality_gap_failures).length; return { blocking, quality }; } function caseScoreFromMetricSubscores(metricSubscores) { if (!metricSubscores) return null; const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score)); if (directProduct !== null) { return Number(directProduct.toFixed(2)); } const candidates = [ scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)), scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)), rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)), rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)), scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score)) ]; return average(candidates); } function isCaseClosed(input) { const checks = input.checks; if (checks) { const routeCorrect = toBooleanSafe(checks.route_correct); const domainPure = toBooleanSafe(checks.domain_pure); const problemFirst = toBooleanSafe(checks.problem_first_answer); if (routeCorrect !== null || domainPure !== null || problemFirst !== null) { if (routeCorrect === false) return false; if (domainPure === false) return false; if (problemFirst === false) return false; return true; } } if (typeof input.scoreIndex === "number") { return input.scoreIndex >= 65; } return null; } function getResultCases(report) { return toArray(report.results) .map((item) => toRecord(item)) .filter((item) => item !== null); } function buildCaseSummaries(report, runId, checkDialogAvailability, annotationStatsByCase) { const results = getResultCases(report); return results.map((item, index) => { const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`; const checks = toRecord(item.checks); const metricSubscores = toRecord(item.metric_subscores); const scoreIndex = caseScoreFromMetricSubscores(metricSubscores) ?? scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ?? null; const closedState = isCaseClosed({ checks, scoreIndex }); const sessionId = `${runId}-${caseId}`; const dialogAvailable = checkDialogAvailability ? fs_1.default.existsSync(path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`)) : false; const annotationStats = annotationStatsByCase?.get(caseId); return { case_id: caseId, domain: toStringSafe(item.domain), query_class: toStringSafe(item.query_class), status: closedState === null ? "unknown" : closedState ? "closed" : "open", score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)), trace_id: toStringSafe(item.trace_id), reply_type: toStringSafe(item.reply_type), session_id: sessionId, dialog_available: dialogAvailable, commented_count: annotationStats?.count ?? 0, latest_annotation_at: annotationStats?.latest_at ?? null, avg_rating: annotationStats?.avg_rating ?? null, checks, metric_subscores: metricSubscores }; }); } function buildCoverageFromCases(cases) { const coverageByDomain = new Map(); let closedCases = 0; let openCases = 0; for (const item of cases) { if (item.status === "closed") closedCases += 1; if (item.status === "open") openCases += 1; const domainKey = item.domain ?? "unknown"; const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 }; current.total += 1; if (item.status === "closed") current.closed += 1; coverageByDomain.set(domainKey, current); } const domainCoverage = Array.from(coverageByDomain.entries()) .map(([domain, value]) => ({ domain, total_cases: value.total, closed_cases: value.closed })) .sort((a, b) => b.total_cases - a.total_cases); return { closed_cases: closedCases, open_cases: openCases, domain_coverage: domainCoverage }; } function collectJsonCandidates(scanLimit) { const candidates = []; const sources = [ { dir: config_1.REPORTS_DIR, suffix: ".json" }, { dir: config_1.EVAL_CASES_DIR, suffix: ".report.json" } ]; for (const source of sources) { if (!fs_1.default.existsSync(source.dir)) continue; const entries = fs_1.default.readdirSync(source.dir, { withFileTypes: true }); for (const entry of entries) { if (!entry.isFile()) continue; if (!entry.name.endsWith(source.suffix)) continue; const fullPath = path_1.default.resolve(source.dir, entry.name); try { const stat = fs_1.default.statSync(fullPath); candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs }); } catch { // skip broken file stat } } } return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit); } function indexRuns(scanLimit) { const files = collectJsonCandidates(scanLimit); const dedup = new Map(); for (const item of files) { let parsed; try { const raw = fs_1.default.readFileSync(item.path, "utf-8"); parsed = JSON.parse(raw); } catch { continue; } const report = toRecord(parsed); if (!report) continue; const runId = toStringSafe(report.run_id); if (!runId) continue; const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path }); const normalizedTime = normalizeTimestamp(report, item.mtimeMs); const indexed = { run_id: runId, eval_target: evalTarget, report_path: item.path, report, timestamp_iso: normalizedTime.iso, timestamp_ms: normalizedTime.ms }; const current = dedup.get(runId); if (!current || indexed.timestamp_ms > current.timestamp_ms) { dedup.set(runId, indexed); } } return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms); } function parseFilters(query) { const fromMs = parseDateMs(query.from); const toMs = parseDateMs(query.to); const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all"; const target = targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0" ? targetRaw : "all"; const useMock = toStringSafe(query.use_mock); const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock); const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all"; const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase(); const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120); const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900); return { from_ms: fromMs, to_ms: toMs, target, use_mock: useMockFilter, prompt_contains: promptContains, mode, limit, scan_limit: scanLimit }; } function matchesFilters(run, filters) { if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms) return false; if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms) return false; if (filters.target !== "all" && run.eval_target !== filters.target) return false; const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase(); if (filters.mode !== "all" && modeValue !== filters.mode) return false; if (filters.use_mock !== null) { const useMockValue = toBooleanSafe(run.report.use_mock); if (useMockValue !== filters.use_mock) return false; } if (filters.prompt_contains.length > 0) { const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase(); if (!promptVersion.includes(filters.prompt_contains)) return false; } return true; } function buildRunSummary(run) { const connection = toRecord(run.report.connection); const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig); const llmProvider = toStringSafe(run.report.llm_provider) ?? toStringSafe(run.report.llmProvider) ?? toStringSafe(connection?.llm_provider) ?? toStringSafe(connection?.llmProvider) ?? toStringSafe(normalizeConfig?.llm_provider) ?? toStringSafe(normalizeConfig?.llmProvider); const model = toStringSafe(run.report.model) ?? toStringSafe(connection?.model) ?? toStringSafe(normalizeConfig?.model); const cases = buildCaseSummaries(run.report, run.run_id, false); const coverage = buildCoverageFromCases(cases); const failures = countFailures(run.report); return { run_id: run.run_id, eval_target: run.eval_target, run_timestamp: run.timestamp_iso, mode: toStringSafe(run.report.mode), llm_provider: llmProvider, model, use_mock: toBooleanSafe(run.report.use_mock), analysis_date: toStringSafe(run.report.analysis_date), prompt_version: toStringSafe(run.report.prompt_version), schema_version: toStringSafe(run.report.schema_version), suite_id: toStringSafe(run.report.suite_id), cases_total: toNumberSafe(run.report.cases_total) ?? cases.length, requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total), report_path: run.report_path, score_index: computeScoreIndex(run.report, run.eval_target), blocking_failures: failures.blocking, quality_failures: failures.quality, closed_cases: coverage.closed_cases, open_cases: coverage.open_cases, domain_coverage: coverage.domain_coverage }; } function mergeDomainCoverage(summaries) { const merged = new Map(); for (const summary of summaries) { for (const item of summary.domain_coverage) { const current = merged.get(item.domain) ?? { total: 0, closed: 0 }; current.total += item.total_cases; current.closed += item.closed_cases; merged.set(item.domain, current); } } return Array.from(merged.entries()) .map(([domain, value]) => ({ domain, total_cases: value.total, closed_cases: value.closed })) .sort((a, b) => b.total_cases - a.total_cases); } function buildHistoryStats(summaries) { const byTarget = {}; let blockingRuns = 0; let qualityRuns = 0; const scoreValues = []; for (const item of summaries) { byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1; if (item.blocking_failures > 0) blockingRuns += 1; if (item.quality_failures > 0) qualityRuns += 1; if (typeof item.score_index === "number") scoreValues.push(item.score_index); } const latestScore = typeof summaries[0]?.score_index === "number" ? summaries[0].score_index : null; const previousScore = typeof summaries[1]?.score_index === "number" ? summaries[1].score_index : null; const trend = latestScore === null || previousScore === null ? "flat" : latestScore > previousScore + 0.5 ? "up" : latestScore < previousScore - 0.5 ? "down" : "flat"; return { runs_total: summaries.length, by_target: byTarget, blocking_runs: blockingRuns, quality_gap_runs: qualityRuns, avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null, latest_score_index: latestScore, previous_score_index: previousScore, trend, domain_coverage: mergeDomainCoverage(summaries) }; } function findRunById(runId, scanLimit = 3000) { const indexed = indexRuns(scanLimit); return indexed.find((item) => item.run_id === runId) ?? null; } function buildAssistantModeSummary(dialogRecord) { if (!dialogRecord) return null; const conversation = toArray(dialogRecord.conversation) .map((item) => toRecord(item)) .filter((item) => item !== null); const lastAssistant = [...conversation] .reverse() .find((item) => toStringSafe(item.role) === "assistant"); const debug = toRecord(lastAssistant?.debug); return { reply_type: toStringSafe(lastAssistant?.reply_type), trace_id: toStringSafe(lastAssistant?.trace_id), detected_mode: toStringSafe(debug?.detected_mode), execution_lane: toStringSafe(debug?.execution_lane), tool_gate_decision: toStringSafe(debug?.tool_gate_decision), living_router_mode: toStringSafe(debug?.living_router_mode), fallback_type: toStringSafe(debug?.fallback_type) }; } function loadSessionDialog(runId, caseId) { const sessionId = `${runId}-${caseId}`; const filePath = path_1.default.resolve(config_1.ASSISTANT_SESSIONS_DIR, `${sessionId}.json`); if (!fs_1.default.existsSync(filePath)) { return null; } let parsed; try { parsed = JSON.parse(fs_1.default.readFileSync(filePath, "utf-8")); } catch { return null; } const record = toRecord(parsed); if (!record) return null; const conversation = toArray(record.conversation) .map((item) => toRecord(item)) .filter((item) => item !== null); const messages = conversation.map((item) => ({ message_id: toStringSafe(item.message_id), role: toStringSafe(item.role) ?? "unknown", text: toStringSafe(item.text) ?? "", created_at: toStringSafe(item.created_at), trace_id: toStringSafe(item.trace_id), reply_type: toStringSafe(item.reply_type) })); const turns = toArray(record.turns) .map((item) => toRecord(item)) .filter((item) => item !== null); const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null; const humanReadable = toRecord(lastTurn?.human_readable); const decomposition = toArray(humanReadable?.decomposition) .map((item) => toStringSafe(item)) .filter((item) => item !== null); return { source: "assistant_session", session_id: sessionId, messages, decomposition, assistant_mode: buildAssistantModeSummary(record) }; } function buildFallbackDialog(run, caseId) { const sessionId = `${run.run_id}-${caseId}`; const results = getResultCases(run.report); const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null; if (!targetCase) { return { source: "none", session_id: sessionId, messages: [], decomposition: [], assistant_mode: null }; } const userText = toStringSafe(targetCase.raw_question) ?? toStringSafe(targetCase.user_query_raw) ?? `Case ${caseId}`; const assistantSummaryParts = []; const validationPassed = toBooleanSafe(targetCase.validation_passed); if (validationPassed !== null) assistantSummaryParts.push(`validation_passed=${validationPassed}`); const routeMatch = toBooleanSafe(targetCase.route_match); if (routeMatch !== null) assistantSummaryParts.push(`route_match=${routeMatch}`); const intentMatch = toBooleanSafe(targetCase.intent_match); if (intentMatch !== null) assistantSummaryParts.push(`intent_match=${intentMatch}`); const confidence = toStringSafe(targetCase.confidence_overall); if (confidence) assistantSummaryParts.push(`confidence=${confidence}`); const metricSubscores = toRecord(targetCase.metric_subscores); if (metricSubscores) { for (const [key, value] of Object.entries(metricSubscores)) { if (toNumberSafe(value) !== null) { assistantSummaryParts.push(`${key}=${value}`); } } } if (assistantSummaryParts.length === 0) { assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts."); } return { source: "report_fallback", session_id: sessionId, messages: [ { message_id: null, role: "user", text: userText, created_at: null, trace_id: null, reply_type: null }, { message_id: null, role: "assistant", text: assistantSummaryParts.join("\n"), created_at: null, trace_id: toStringSafe(targetCase.trace_id), reply_type: toStringSafe(targetCase.reply_type) } ], decomposition: [], assistant_mode: null }; } function withMessageAnnotations(runId, caseId, messages, annotations) { const byIndex = buildAnnotationsByMessageIndex(runId, caseId, annotations); return messages.map((message, index) => { const annotation = byIndex.get(index) ?? null; return { ...message, message_index: index, commented: annotation !== null, annotation }; }); } function buildRunAggregateDialog(run, annotations) { const cases = buildCaseSummaries(run.report, run.run_id, false); const messages = []; const decomposition = []; let globalMessageIndex = 0; for (const item of cases) { const caseId = item.case_id; const caseDialog = loadSessionDialog(run.run_id, caseId) ?? buildFallbackDialog(run, caseId); const annotatedCaseMessages = withMessageAnnotations(run.run_id, caseId, caseDialog.messages, annotations); for (const caseMessage of annotatedCaseMessages) { const localMessageIndex = toNumberSafe(caseMessage.message_index) ?? 0; messages.push({ ...caseMessage, case_id: caseId, case_message_index: localMessageIndex, message_index: globalMessageIndex }); globalMessageIndex += 1; } if (caseDialog.decomposition.length > 0) { decomposition.push(...caseDialog.decomposition.map((step) => `[${caseId}] ${step}`)); } } return { source: "run_aggregate", session_id: `${run.run_id}::__all__`, messages, decomposition, assistant_mode: null }; } function generateAnnotationId() { return `ann-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`; } function parseComment(value) { const text = toStringSafe(value) ?? ""; return text.trim(); } function parseDecisionFilter(value) { const normalized = toStringSafe(value); if (!normalized || normalized === "all") return "all"; return parseManualCaseDecision(normalized); } function parseAutoGenMode(value) { const normalized = toStringSafe(value)?.toLowerCase() ?? ""; if (normalized === "qwen_seed" || normalized === "codex_creative") { return normalized; } return "codex_creative"; } function parseAutogenCount(value) { return clampInt(toNumberSafe(value), 1, 200, 24); } function parseAutogenDomain(value) { const domain = normalizeDomainHint(value); if (!domain) return null; return domain.slice(0, 80); } function parseAutogenLlmRuntimeConfig(body, context) { const llm = toRecord(body.llm); const providerRaw = toStringSafe(llm?.llm_provider ?? context?.llm_provider)?.toLowerCase() ?? ""; const model = toStringSafe(llm?.model ?? context?.model); if (!model || (providerRaw !== "openai" && providerRaw !== "local")) { return null; } return { llm_provider: providerRaw === "local" ? "local" : "openai", api_key: toStringSafe(llm?.api_key) ?? "", model, base_url: toStringSafe(llm?.base_url), temperature: toNumberSafe(llm?.temperature), max_output_tokens: toNumberSafe(llm?.max_output_tokens) }; } function textMojibakeScore(value) { const source = String(value ?? ""); const cyrillic = (source.match(/[А-Яа-яЁё]/g) ?? []).length; const latin = (source.match(/[A-Za-z]/g) ?? []).length; const hardMarkers = (source.match(/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/g) ?? []).length; const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length; const doubleEncodedMarkers = (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length; return cyrillic + latin - hardMarkers * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2; } function looksLikeMojibake(value) { const source = String(value ?? ""); if (!source.trim()) { return false; } if (/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/.test(source)) { return true; } if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) { return true; } return (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length >= 2; } function repairAutogenMojibake(value) { const source = String(value ?? ""); if (!looksLikeMojibake(source)) { return source; } let candidate = source; for (let pass = 0; pass < 3; pass += 1) { let improved = false; try { const fromWin1251 = iconv_lite_1.default.encode(candidate, "win1251").toString("utf8"); if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) { candidate = fromWin1251; improved = true; } } catch { // ignore } try { const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8"); if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) { candidate = fromLatin1; improved = true; } } catch { // ignore } if (!improved) { break; } } return candidate; } function sanitizeGeneratedQuestion(value) { return repairAutogenMojibake(String(value ?? "")) .replace(/\r/g, " ") .replace(/\t/g, " ") .replace(/\s+/g, " ") .trim(); } const AUTOGEN_QUESTION_PLACEHOLDER_PATTERN = /^(?:questions?|вопросы?|список\s+вопросов)$/iu; const AUTOGEN_QUESTION_TAIL_PATTERNS = [ /^(?:без\s+воды|по\s+факту|и\s+коротко|коротко|прям(?:\s+)?сейчас|за\s+весь\s+период|по\s+делу)\??$/iu ]; function stripAutogenQuestionSuffix(value) { return sanitizeGeneratedQuestion(value).replace(/[?!.:,;]+$/u, "").trim(); } function isAutogenQuestionPlaceholder(value) { const core = stripAutogenQuestionSuffix(value).toLowerCase(); return core.length > 0 && AUTOGEN_QUESTION_PLACEHOLDER_PATTERN.test(core); } function isLikelyAutogenQuestionTail(value) { const core = stripAutogenQuestionSuffix(value).toLowerCase(); if (!core) { return false; } if (isAutogenQuestionPlaceholder(core)) { return true; } return AUTOGEN_QUESTION_TAIL_PATTERNS.some((pattern) => pattern.test(core)); } function mergeAutogenQuestionTail(baseQuestion, tail) { const base = stripAutogenQuestionSuffix(baseQuestion); const suffix = stripAutogenQuestionSuffix(tail); if (!base) { return suffix ? `${suffix}?` : ""; } if (!suffix) { return `${base}?`; } return `${base} ${suffix}?` .replace(/\s+/g, " ") .trim(); } function normalizeAutogenQuestionCandidates(candidates) { const normalized = []; for (const candidate of candidates) { const question = sanitizeGeneratedQuestion(candidate); if (!question) { continue; } if (isAutogenQuestionPlaceholder(question)) { continue; } if (isLikelyAutogenQuestionTail(question) && normalized.length > 0) { const merged = mergeAutogenQuestionTail(normalized[normalized.length - 1], question); if (merged) { normalized[normalized.length - 1] = merged; } continue; } normalized.push(question); } return normalized.filter((item) => item.length > 0); } function splitQuestionCandidates(rawText) { const normalized = repairAutogenMojibake(rawText).replace(/\r/g, "\n").trim(); if (!normalized) return []; const unescaped = normalized.replace(/\\"/g, '"').replace(/\\n/g, "\n"); const byLines = unescaped .split(/\n+/g) .map((line) => line.replace(/^\s*(?:[-*•]|\d{1,3}[).:]?)\s*/, "")) .map((line) => sanitizeGeneratedQuestion(line)) .filter((line) => line.length > 0); if (byLines.length > 1) { return normalizeAutogenQuestionCandidates(byLines); } const questionMarkCount = (unescaped.match(/\?/g) ?? []).length; if (questionMarkCount > 1) { const questionChunks = Array.from(unescaped.matchAll(/[^?]+(?:\?|$)/g)) .map((match) => sanitizeGeneratedQuestion(match[0])) .filter((chunk) => chunk.length > 0); if (questionChunks.length > 1) { const canSafelySplit = questionChunks.every((chunk) => !isAutogenQuestionPlaceholder(chunk) && !isLikelyAutogenQuestionTail(chunk) && sanitizeGeneratedQuestion(chunk).length >= 18); if (canSafelySplit) { return normalizeAutogenQuestionCandidates(questionChunks.map((chunk) => (chunk.endsWith("?") ? chunk : `${chunk}?`))); } } } const quoted = Array.from(unescaped.matchAll(/"([^"\n]{6,}?)"/g)) .map((match) => sanitizeGeneratedQuestion(match[1])) .filter((line) => line.length > 0); if (quoted.length > 1) { return normalizeAutogenQuestionCandidates(quoted); } const cleaned = sanitizeGeneratedQuestion(unescaped); return cleaned ? normalizeAutogenQuestionCandidates([cleaned]) : []; } function parseAutogenOutputJson(rawText) { const cleaned = repairAutogenMojibake(rawText) .trim() .replace(/^```json\s*/i, "") .replace(/^```\s*/i, "") .replace(/```$/i, "") .trim(); if (!cleaned) return null; try { return JSON.parse(cleaned); } catch { // continue } const arrayStart = cleaned.indexOf("["); const arrayEnd = cleaned.lastIndexOf("]"); if (arrayStart >= 0 && arrayEnd > arrayStart) { const fragment = cleaned.slice(arrayStart, arrayEnd + 1); try { return JSON.parse(fragment); } catch { // continue } } const objStart = cleaned.indexOf("{"); const objEnd = cleaned.lastIndexOf("}"); if (objStart >= 0 && objEnd > objStart) { const fragment = cleaned.slice(objStart, objEnd + 1); try { return JSON.parse(fragment); } catch { return null; } } return null; } function collectQuestionsFromCandidate(value, depth = 0) { if (depth > 5 || value === null || value === undefined) { return []; } if (Array.isArray(value)) { const expanded = value.flatMap((item) => collectQuestionsFromCandidate(item, depth + 1)); return normalizeAutogenQuestionCandidates(expanded); } if (typeof value === "string") { const text = value.trim(); if (!text) return []; const nestedParsed = parseAutogenOutputJson(text); if (nestedParsed !== null) { const nestedQuestions = collectQuestionsFromCandidate(nestedParsed, depth + 1); if (nestedQuestions.length > 0) { return nestedQuestions; } } try { const decoded = JSON.parse(text); if (decoded !== text) { const decodedQuestions = collectQuestionsFromCandidate(decoded, depth + 1); if (decodedQuestions.length > 0) { return decodedQuestions; } } } catch { // ignore non-JSON strings } return splitQuestionCandidates(text); } const record = toRecord(value); if (!record) { return []; } const fromQuestions = collectQuestionsFromCandidate(record.questions, depth + 1); if (fromQuestions.length > 0) { return fromQuestions; } const fallbackText = toStringSafe(record.question ?? record.user_message ?? record.text); return fallbackText ? splitQuestionCandidates(fallbackText) : []; } function extractQuestionsFromAutogenOutput(rawText) { const parsed = parseAutogenOutputJson(rawText); const fromParsed = collectQuestionsFromCandidate(parsed); if (fromParsed.length > 0) { return fromParsed; } return collectQuestionsFromCandidate(rawText); } exports.__autoRunsQuestionTestUtils = { splitQuestionCandidates, extractQuestionsFromAutogenOutput }; async function generateQwenSeedQuestionsLive(input) { const seedExamples = collectCanonicalQuestions(40); const fallbackExamples = fallbackDomainTemplates(input.domain); const examples = (seedExamples.length > 0 ? seedExamples : fallbackExamples).slice(0, 8); const personalityPrompt = input.personalityPrompt ?? "Генерируй реалистичные вопросы бухгалтера по 1С. Разговорный стиль допустим, но смысл должен быть четким."; const repairedPersonalityPrompt = repairAutogenMojibake(personalityPrompt); const maxOutputTokens = clampInt(input.llmConfig.max_output_tokens, 300, 3000, 1200); const temperature = input.llmConfig.temperature === null ? 0.5 : Math.max(0, Math.min(1.5, input.llmConfig.temperature)); const systemPrompt = [ "Ты генератор вопросов для автопрогонов бухгалтерского ассистента по 1С.", "Возвращай только JSON и никаких пояснений.", "Ассистент работает в read-only режиме: не проси действий изменения базы." ].join(" "); const repairedSystemPrompt = repairAutogenMojibake(systemPrompt); const developerPrompt = [ `Нужно сгенерировать ровно ${input.count} вопросов.`, "Формат ответа строго:", '{"questions":["вопрос 1","вопрос 2"]}', "Требования:", "1) каждый вопрос отдельный, без дубликатов;", "2) живой пользовательский язык;", "3) допустимы легкие разговорные сокращения;", "4) не выдавай мета-комментарии и не описывай правила." ].join("\n"); const repairedDeveloperPrompt = repairAutogenMojibake(developerPrompt); const userMessage = [ `Домен: ${input.domain ?? "general"}.`, `Промпт личности: ${repairedPersonalityPrompt}`, "Примеры ориентиров по стилю и тематике:", ...examples.map((item, index) => `${index + 1}. ${item}`) ].join("\n"); const repairedUserMessage = repairAutogenMojibake(userMessage); const response = await input.client.chat({ llmProvider: input.llmConfig.llm_provider, apiKey: input.llmConfig.api_key, model: input.llmConfig.model, baseUrl: input.llmConfig.base_url ?? undefined, temperature, maxOutputTokens: maxOutputTokens }, { systemPrompt: repairedSystemPrompt, developerPrompt: repairedDeveloperPrompt, userMessage: repairedUserMessage, temperature, maxOutputTokens }); const extracted = extractQuestionsFromAutogenOutput(response.outputText); const normalized = Array.from(new Set(extracted.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))); if (normalized.length === 0) { throw new http_1.ApiError("AUTOGEN_LLM_EMPTY_OUTPUT", "Qwen не вернул пригодные вопросы для автогенерации.", 502, { model: input.llmConfig.model }); } const fallback = generateQwenSeedQuestions(input.count, input.domain); return Array.from(new Set([...normalized, ...fallback])).slice(0, input.count); } function hasAnyRunFilterQuery(query) { return Boolean(toStringSafe(query.from) ?? toStringSafe(query.to) ?? toStringSafe(query.target) ?? toStringSafe(query.mode) ?? toStringSafe(query.use_mock) ?? toStringSafe(query.prompt_contains)); } function buildAutogenCaseSetFileName(mode, generationId) { const now = new Date(); const stamp = [ now.getUTCFullYear(), String(now.getUTCMonth() + 1).padStart(2, "0"), String(now.getUTCDate()).padStart(2, "0"), String(now.getUTCHours()).padStart(2, "0"), String(now.getUTCMinutes()).padStart(2, "0"), String(now.getUTCSeconds()).padStart(2, "0") ].join(""); return `assistant_autogen_${mode}_${stamp}_${generationId}.json`; } function buildAutogenCaseSetPayload(input) { const normalizedQuestions = Array.from(new Set(input.questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))); const cases = normalizedQuestions.map((question, index) => ({ case_id: `AUTO-${String(index + 1).padStart(3, "0")}`, scenario_tag: `${input.mode}_${input.domain ?? "general"}`, question_type: "direct", broadness_level: "medium", turns: [{ user_message: question }], expected_hints: { expected_reply_type: null, expected_degraded_to: null } })); return { suite_id: `assistant_autogen_${input.generationId}`, suite_version: "0.1.0", schema_version: "assistant_autogen_suite_v0_1", generated_at: new Date().toISOString(), generation_id: input.generationId, mode: input.mode, domain: input.domain, scenario_count: cases.length, case_ids: cases.map((item) => item.case_id), cases }; } function collectPostAnalysis(annotations, runMap, limitPerQueue) { const byDecision = {}; const byQueue = {}; const byDomain = new Map(); const queues = { routing_extension: [], policy_fix: [], capability_registry: [], soft_boundary: [], safety_policy: [], testset_hygiene: [], covered_ok: [] }; const registry = (0, capabilitiesRegistry_1.loadCapabilitiesRegistry)(); for (const item of annotations) { byDecision[item.manual_case_decision] = (byDecision[item.manual_case_decision] ?? 0) + 1; const queueKey = DECISION_QUEUE_MAP[item.manual_case_decision]; byQueue[queueKey] = (byQueue[queueKey] ?? 0) + 1; const run = runMap.get(item.run_id) ?? null; const caseSummary = run ? buildCaseSummaries(run.report, run.run_id, false).find((candidate) => candidate.case_id === item.case_id) ?? null : null; const nearestGroup = (0, capabilitiesRegistry_1.resolveNearestCapabilityGroup)({ domain: caseSummary?.domain ?? item.context.domain, queryClass: caseSummary?.query_class ?? item.context.query_class }) ?? registry.groups[0] ?? null; const domainKey = caseSummary?.domain ?? item.context.domain ?? "unknown"; byDomain.set(domainKey, (byDomain.get(domainKey) ?? 0) + 1); const view = { annotation_id: item.annotation_id, run_id: item.run_id, case_id: item.case_id, message_index: item.message_index, rating: item.rating, comment: item.comment, manual_case_decision: item.manual_case_decision, annotation_author: item.annotation_author, updated_at: item.updated_at, domain: caseSummary?.domain ?? item.context.domain ?? null, query_class: caseSummary?.query_class ?? item.context.query_class ?? null, trace_id: item.context.trace_id ?? caseSummary?.trace_id ?? null, reply_type: item.context.reply_type ?? caseSummary?.reply_type ?? null, nearest_capability_group: nearestGroup ? { group_code: nearestGroup.group_code, group_title: nearestGroup.group_title, maturity_status: nearestGroup.maturity_status } : null }; if (queueKey === "none") { if (queues.covered_ok.length < limitPerQueue) queues.covered_ok.push(view); continue; } if (!queues[queueKey]) { queues[queueKey] = []; } if (queues[queueKey].length < limitPerQueue) { queues[queueKey].push(view); } } const domainSummary = Array.from(byDomain.entries()) .map(([domain, total]) => ({ domain, total })) .sort((a, b) => b.total - a.total); return { stats: { annotations_total: annotations.length, by_decision: byDecision, by_queue: byQueue, domains_total: domainSummary.length }, domain_summary: domainSummary, queues, recommended_regression_candidates: [ ...queues.routing_extension.slice(0, 20), ...queues.policy_fix.slice(0, 20), ...queues.safety_policy.slice(0, 20) ].slice(0, 60) }; } function buildAutoRunsRouter(openaiClient = new openaiResponsesClient_1.OpenAIResponsesClient()) { const router = (0, express_1.Router)(); router.get("/api/autoruns/history", (req, res) => { const filters = parseFilters(req.query); const indexed = indexRuns(filters.scan_limit); const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit); const summaries = filtered.map((run) => buildRunSummary(run)); const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort(); const availableModes = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item) => item !== null))).sort(); const availablePromptVersions = Array.from(new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item) => item !== null))).sort(); (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), filters_applied: { from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(), to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(), target: filters.target, use_mock: filters.use_mock, prompt_contains: filters.prompt_contains, mode: filters.mode, limit: filters.limit, scan_limit: filters.scan_limit }, available: { targets: availableTargets, modes: availableModes, prompt_versions: availablePromptVersions }, items: summaries, stats: buildHistoryStats(summaries) }); }); router.get("/api/autoruns/history/:run_id", (req, res, next) => { try { const runId = String(req.params.run_id ?? "").trim(); if (!runId) { throw new http_1.ApiError("INVALID_RUN_ID", "run_id is required", 400); } const run = findRunById(runId); if (!run) { throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404); } const annotations = readAnnotations(); const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations); const cases = buildCaseSummaries(run.report, run.run_id, true, annotationStatsByCase); const coverage = buildCoverageFromCases(cases); (0, http_1.ok)(res, { ok: true, run: buildRunSummary(run), coverage, cases, annotations_summary: { total: annotations.filter((item) => item.run_id === runId).length }, report: run.report }); } catch (error) { next(error); } }); router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => { try { const runId = String(req.params.run_id ?? "").trim(); const caseId = String(req.params.case_id ?? "").trim(); if (!runId || !caseId) { throw new http_1.ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400); } const run = findRunById(runId); if (!run) { throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404); } const annotations = readAnnotations(); if (caseId === "__all__") { const dialog = buildRunAggregateDialog(run, annotations); (0, http_1.ok)(res, { ok: true, run_id: runId, case_id: "__all__", ...dialog, annotations: annotations .filter((item) => item.run_id === runId) .sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at)) }); return; } const sessionDialog = loadSessionDialog(runId, caseId); const dialog = sessionDialog ?? buildFallbackDialog(run, caseId); const messages = withMessageAnnotations(runId, caseId, dialog.messages, annotations); (0, http_1.ok)(res, { ok: true, run_id: runId, case_id: caseId, ...dialog, messages, annotations: annotations .filter((item) => item.run_id === runId && item.case_id === caseId) .sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at)) }); } catch (error) { next(error); } }); router.get("/api/autoruns/annotations", (req, res, next) => { try { const runIdFilter = toStringSafe(req.query.run_id); const caseIdFilter = toStringSafe(req.query.case_id); const minRatingRaw = toNumberSafe(req.query.min_rating); const minRating = minRatingRaw === null ? null : clampInt(minRatingRaw, 1, 5, 1); const decisionFilter = parseDecisionFilter(req.query.manual_case_decision); const limit = clampInt(toNumberSafe(req.query.limit), 1, 2000, 400); const scanLimit = clampInt(toNumberSafe(req.query.scan_limit), 50, 5000, 2500); const annotations = readAnnotations() .filter((item) => (runIdFilter ? item.run_id === runIdFilter : true)) .filter((item) => (caseIdFilter ? item.case_id === caseIdFilter : true)) .filter((item) => (minRating === null ? true : item.rating >= minRating)) .filter((item) => (decisionFilter === "all" ? true : item.manual_case_decision === decisionFilter)) .sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at)) .slice(0, limit); const runIndex = indexRuns(scanLimit); const runMap = new Map(runIndex.map((item) => [item.run_id, item])); const items = annotations.map((item) => { const run = runMap.get(item.run_id) ?? null; const runSummary = run ? buildRunSummary(run) : null; const cases = run ? buildCaseSummaries(run.report, run.run_id, false) : []; const caseSummary = cases.find((candidate) => candidate.case_id === item.case_id) ?? null; return { ...item, run: runSummary, case_summary: caseSummary, technical_context: { report_path: run?.report_path ?? null, trace_id: item.context.trace_id, reply_type: item.context.reply_type, domain: item.context.domain, query_class: item.context.query_class, checks: caseSummary?.checks ?? null, metric_subscores: caseSummary?.metric_subscores ?? null } }; }); const avgRating = items.length > 0 ? Number((items.reduce((acc, item) => acc + item.rating, 0) / items.length).toFixed(2)) : null; const byDecision = items.reduce((acc, item) => { acc[item.manual_case_decision] = (acc[item.manual_case_decision] ?? 0) + 1; return acc; }, {}); (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), filters_applied: { run_id: runIdFilter ?? null, case_id: caseIdFilter ?? null, min_rating: minRating, manual_case_decision: decisionFilter, limit }, stats: { total: items.length, avg_rating: avgRating, by_decision: byDecision }, available_manual_case_decisions: MANUAL_CASE_DECISIONS, manual_case_decision_schema: readManualDecisionSchema(), items }); } catch (error) { next(error); } }); router.post("/api/autoruns/annotations", (req, res, next) => { try { const body = toRecord(req.body); if (!body) { throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "JSON body is required", 400); } const runId = toStringSafe(body.run_id); const caseId = toStringSafe(body.case_id); const messageIndexRaw = toNumberSafe(body.message_index); const ratingRaw = toNumberSafe(body.rating); const comment = parseComment(body.comment); const manualCaseDecision = parseManualCaseDecision(body.manual_case_decision); const annotationAuthor = parseAnnotationAuthor(body.annotation_author); if (!runId || !caseId) { throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "run_id and case_id are required", 400); } if (messageIndexRaw === null) { throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "message_index is required", 400); } const messageIndex = clampInt(messageIndexRaw, 0, 100_000, 0); if (ratingRaw === null) { throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "rating is required", 400); } const rating = clampInt(ratingRaw, 1, 5, 1); if (comment.length === 0) { throw new http_1.ApiError("INVALID_ANNOTATION_PAYLOAD", "comment is required", 400); } const run = findRunById(runId); if (!run) { throw new http_1.ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404); } const cases = buildCaseSummaries(run.report, run.run_id, false); const caseSummary = cases.find((item) => item.case_id === caseId) ?? null; if (!caseSummary) { throw new http_1.ApiError("AUTORUN_CASE_NOT_FOUND", `Case not found: ${caseId} in run ${runId}`, 404); } const sessionDialog = loadSessionDialog(runId, caseId); const dialog = sessionDialog ?? buildFallbackDialog(run, caseId); if (messageIndex >= dialog.messages.length) { throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_FOUND", `Message index ${messageIndex} out of range`, 400); } const targetMessage = dialog.messages[messageIndex]; const targetRole = toStringSafe(targetMessage.role) ?? "unknown"; if (targetRole !== "assistant") { throw new http_1.ApiError("AUTORUN_MESSAGE_NOT_ASSISTANT", "Only assistant answers can be annotated", 400); } const pairedUserQuestion = [...dialog.messages.slice(0, messageIndex)] .reverse() .find((item) => (toStringSafe(item.role) ?? "") === "user"); const nowIso = new Date().toISOString(); const annotations = readAnnotations(); const key = annotationKey(runId, caseId, messageIndex); const existingIndex = annotations.findIndex((item) => annotationKey(item.run_id, item.case_id, item.message_index) === key); const existing = existingIndex >= 0 ? annotations[existingIndex] : null; const annotation = { annotation_id: existing?.annotation_id ?? generateAnnotationId(), run_id: runId, case_id: caseId, session_id: caseSummary.session_id, message_index: messageIndex, rating, comment, manual_case_decision: manualCaseDecision, annotation_author: annotationAuthor, resolved: existing?.resolved ?? false, resolved_at: existing?.resolved_at ?? null, resolved_by: existing?.resolved_by ?? null, created_at: existing?.created_at ?? nowIso, updated_at: nowIso, context: { message_id: toStringSafe(targetMessage.message_id), trace_id: toStringSafe(targetMessage.trace_id) ?? caseSummary.trace_id, reply_type: toStringSafe(targetMessage.reply_type) ?? caseSummary.reply_type, eval_target: run.eval_target, prompt_version: toStringSafe(run.report.prompt_version), domain: caseSummary.domain, query_class: caseSummary.query_class, question_text: toStringSafe(pairedUserQuestion?.text), answer_text: toStringSafe(targetMessage.text) } }; if (existingIndex >= 0) { annotations[existingIndex] = annotation; } else { annotations.push(annotation); } writeAnnotations(annotations); const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations); const caseStats = annotationStatsByCase.get(caseId) ?? null; (0, http_1.ok)(res, { ok: true, annotation, case_annotation_stats: caseStats }); } catch (error) { next(error); } }); router.patch("/api/autoruns/annotations/:annotation_id", (req, res, next) => { try { const annotationId = toStringSafe(req.params.annotation_id); if (!annotationId) { throw new http_1.ApiError("INVALID_ANNOTATION_ID", "annotation_id is required", 400); } const body = toRecord(req.body); if (!body) { throw new http_1.ApiError("INVALID_ANNOTATION_PATCH", "JSON body is required", 400); } const resolved = toBooleanSafe(body.resolved); if (resolved === null) { throw new http_1.ApiError("INVALID_ANNOTATION_PATCH", "resolved flag is required", 400); } const resolvedBy = parseAnnotationAuthor(body.resolved_by); const annotations = readAnnotations(); const index = annotations.findIndex((item) => item.annotation_id === annotationId); if (index < 0) { throw new http_1.ApiError("ANNOTATION_NOT_FOUND", `Annotation not found: ${annotationId}`, 404); } const nowIso = new Date().toISOString(); const current = annotations[index]; const updated = { ...current, resolved, resolved_at: resolved ? nowIso : null, resolved_by: resolved ? resolvedBy ?? current.resolved_by ?? null : null, updated_at: nowIso }; annotations[index] = updated; writeAnnotations(annotations); const statsByCase = buildAnnotationStatsMap(updated.run_id, annotations); const caseStats = statsByCase.get(updated.case_id) ?? null; (0, http_1.ok)(res, { ok: true, annotation: updated, case_annotation_stats: caseStats }); } catch (error) { next(error); } }); router.get("/api/autoruns/manual-decision-schema", (_req, res) => { (0, http_1.ok)(res, { ok: true, schema: readManualDecisionSchema(), enum: MANUAL_CASE_DECISIONS }); }); router.get("/api/autoruns/post-analysis", (req, res, next) => { try { const query = req.query; const runIdFilter = toStringSafe(query.run_id); const limitPerQueue = clampInt(toNumberSafe(query.limit_per_queue), 5, 250, 40); const annotationLimit = clampInt(toNumberSafe(query.annotation_limit), 20, 5000, 1500); const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 2500); const runFilters = parseFilters(query); const applyRunFilters = hasAnyRunFilterQuery(query); const runIndex = indexRuns(Math.max(scanLimit, runFilters.scan_limit)); const filteredRuns = applyRunFilters ? runIndex.filter((run) => matchesFilters(run, runFilters)) : runIndex; const runMap = new Map(filteredRuns.map((run) => [run.run_id, run])); const scopedAnnotations = readAnnotations() .filter((item) => (runIdFilter ? item.run_id === runIdFilter : true)) .filter((item) => (runMap.size > 0 ? runMap.has(item.run_id) : true)) .sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at)) .slice(0, annotationLimit); const analysis = collectPostAnalysis(scopedAnnotations, runMap, limitPerQueue); (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), filters_applied: { run_id: runIdFilter ?? null, run_filters_applied: applyRunFilters, limit_per_queue: limitPerQueue, annotation_limit: annotationLimit, scan_limit: scanLimit }, runs_considered: filteredRuns.slice(0, 500).map((item) => buildRunSummary(item)), manual_case_decision_schema: readManualDecisionSchema(), post_analysis: analysis }); } catch (error) { next(error); } }); router.get("/api/autoruns/autogen/history", (req, res, next) => { try { const limit = clampInt(toNumberSafe(req.query.limit), 1, 500, 120); const rawMode = toStringSafe(req.query.mode); const includeAllModes = !rawMode || !["qwen_seed", "codex_creative"].includes(rawMode); const modeFilter = rawMode ?? "codex_creative"; const items = readAutoGenHistory() .filter((item) => (includeAllModes ? true : item.mode === modeFilter)) .slice(0, limit); (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), items }); } catch (error) { next(error); } }); router.get("/api/autoruns/autogen/personality-catalog", (_req, res, next) => { try { (0, http_1.ok)(res, { ok: true, generated_at: new Date().toISOString(), items: buildAutogenPersonalityCatalog() }); } catch (error) { next(error); } }); router.post("/api/autoruns/autogen/generate", async (req, res, next) => { try { const body = toRecord(req.body); if (!body) { throw new http_1.ApiError("INVALID_AUTOGEN_PAYLOAD", "JSON body is required", 400); } const mode = parseAutoGenMode(body.mode); const count = parseAutogenCount(body.count); const domain = parseAutogenDomain(body.domain); const persistCaseSet = toBooleanSafe(body.persist_to_eval_cases) ?? true; const generatedBy = parseAnnotationAuthor(body.generated_by); const context = toRecord(body.context); const llmConfig = parseAutogenLlmRuntimeConfig(body, context); const personalityPrompt = toStringSafe(context?.autogen_personality_prompt); let questions = []; if (mode === "qwen_seed") { if (!llmConfig) { throw new http_1.ApiError("AUTOGEN_LLM_CONFIG_REQUIRED", "Для режима qwen_seed нужен активный LLM-контур (provider/model/baseUrl) из настроек подключения.", 400); } questions = await generateQwenSeedQuestionsLive({ count, domain, personalityPrompt, llmConfig, client: openaiClient }); } else { questions = generateCodexCreativeQuestions(count, domain); } questions = Array.from(new Set(questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))).slice(0, count); const generationId = generateAutogenId(); let savedCaseSetFile = null; if (persistCaseSet) { if (!fs_1.default.existsSync(config_1.EVAL_CASES_DIR)) { fs_1.default.mkdirSync(config_1.EVAL_CASES_DIR, { recursive: true }); } const fileName = buildAutogenCaseSetFileName(mode, generationId); const filePath = path_1.default.resolve(config_1.EVAL_CASES_DIR, fileName); const payload = buildAutogenCaseSetPayload({ generationId, mode, domain, questions }); fs_1.default.writeFileSync(filePath, JSON.stringify(payload, null, 2), "utf-8"); savedCaseSetFile = fileName; } const record = { generation_id: generationId, created_at: new Date().toISOString(), mode, count: questions.length, domain, questions, generated_by: generatedBy, saved_case_set_file: savedCaseSetFile, context: context ? { llm_provider: toStringSafe(context.llm_provider), model: toStringSafe(context.model), assistant_prompt_version: toStringSafe(context.assistant_prompt_version), decomposition_prompt_version: toStringSafe(context.decomposition_prompt_version), prompt_fingerprint: toStringSafe(context.prompt_fingerprint) ? repairAutogenMojibake(String(context.prompt_fingerprint)) : null, autogen_personality_id: toStringSafe(context.autogen_personality_id), autogen_personality_prompt: toStringSafe(context.autogen_personality_prompt) ? repairAutogenMojibake(String(context.autogen_personality_prompt)) : null } : null }; const history = readAutoGenHistory(); history.unshift(record); writeAutoGenHistory(history.slice(0, 500)); (0, http_1.ok)(res, { ok: true, generation: record }); } catch (error) { next(error); } }); return router; }