NODEDC_1C/llm_normalizer/backend/src/routes/autoRuns.ts

2303 lines
83 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import fs from "fs";
import path from "path";
import { Router } from "express";
import iconv from "iconv-lite";
import {
ASSISTANT_SESSIONS_DIR,
AUTORUN_ANNOTATIONS_FILE,
AUTORUN_GENERATOR_HISTORY_FILE,
EVAL_CASES_DIR,
EVAL_DATASETS_DIR,
MANUAL_CASE_DECISION_SCHEMA_FILE,
REPORTS_DIR
} from "../config";
import { ApiError, ok } from "../utils/http";
import { loadCapabilitiesRegistry, resolveNearestCapabilityGroup, type CapabilityGroup } from "../services/capabilitiesRegistry";
import { OpenAIResponsesClient } from "../services/openaiResponsesClient";
type AutoRunTarget = "normalizer" | "assistant_stage1" | "assistant_stage2" | "assistant_p0" | "unknown";
type AutoRunTrend = "up" | "down" | "flat";
type AutoGenMode = "qwen_seed" | "codex_creative";
type ManualCaseDecision =
| "covered_ok"
| "covered_but_bad_answer"
| "candidate_for_implementation"
| "needs_routing_extension"
| "out_of_scope_but_answer_softly"
| "unsafe_question_limit_strictly"
| "needs_dialog_policy_fix"
| "needs_capability_registry_update"
| "bad_test_case";
const MANUAL_CASE_DECISIONS: ManualCaseDecision[] = [
"covered_ok",
"covered_but_bad_answer",
"candidate_for_implementation",
"needs_routing_extension",
"out_of_scope_but_answer_softly",
"unsafe_question_limit_strictly",
"needs_dialog_policy_fix",
"needs_capability_registry_update",
"bad_test_case"
];
const DECISION_QUEUE_MAP: Record<ManualCaseDecision, string> = {
covered_ok: "none",
covered_but_bad_answer: "policy_fix",
candidate_for_implementation: "routing_extension",
needs_routing_extension: "routing_extension",
out_of_scope_but_answer_softly: "soft_boundary",
unsafe_question_limit_strictly: "safety_policy",
needs_dialog_policy_fix: "policy_fix",
needs_capability_registry_update: "capability_registry",
bad_test_case: "testset_hygiene"
};
interface IndexedRun {
run_id: string;
eval_target: AutoRunTarget;
report_path: string;
report: Record<string, unknown>;
timestamp_iso: string;
timestamp_ms: number;
}
interface RunFilters {
from_ms: number | null;
to_ms: number | null;
target: AutoRunTarget | "all";
use_mock: boolean | null;
prompt_contains: string;
mode: string;
limit: number;
scan_limit: number;
}
interface DomainCoverage {
domain: string;
total_cases: number;
closed_cases: number;
}
interface RunCoverage {
closed_cases: number;
open_cases: number;
domain_coverage: DomainCoverage[];
}
interface RunSummary {
run_id: string;
eval_target: AutoRunTarget;
run_timestamp: string;
mode: string | null;
llm_provider: string | null;
model: string | null;
use_mock: boolean | null;
prompt_version: string | null;
schema_version: string | null;
suite_id: string | null;
cases_total: number;
requests_total: number | null;
report_path: string;
score_index: number | null;
blocking_failures: number;
quality_failures: number;
closed_cases: number;
open_cases: number;
domain_coverage: DomainCoverage[];
}
interface CaseSummary {
case_id: string;
domain: string | null;
query_class: string | null;
status: "closed" | "open" | "unknown";
score_index: number | null;
trace_id: string | null;
reply_type: string | null;
session_id: string;
dialog_available: boolean;
commented_count: number;
latest_annotation_at: string | null;
avg_rating: number | null;
checks: Record<string, unknown> | null;
metric_subscores: Record<string, unknown> | null;
}
interface HistoryStats {
runs_total: number;
by_target: Record<string, number>;
blocking_runs: number;
quality_gap_runs: number;
avg_score_index: number | null;
latest_score_index: number | null;
previous_score_index: number | null;
trend: AutoRunTrend;
domain_coverage: DomainCoverage[];
}
interface AutoRunAnnotationRecord {
annotation_id: string;
run_id: string;
case_id: string;
session_id: string;
message_index: number;
rating: number;
comment: string;
manual_case_decision: ManualCaseDecision;
annotation_author: string | null;
resolved: boolean;
resolved_at: string | null;
resolved_by: string | null;
created_at: string;
updated_at: string;
context: {
message_id: string | null;
trace_id: string | null;
reply_type: string | null;
eval_target: AutoRunTarget | "unknown";
prompt_version: string | null;
domain: string | null;
query_class: string | null;
question_text: string | null;
answer_text: string | null;
};
}
interface AnnotationStatsByCase {
count: number;
latest_at: string | null;
avg_rating: number | null;
}
interface AutoGenHistoryRecord {
generation_id: string;
created_at: string;
mode: AutoGenMode;
count: number;
domain: string | null;
questions: string[];
generated_by: string | null;
saved_case_set_file: string | null;
context: {
llm_provider: string | null;
model: string | null;
assistant_prompt_version: string | null;
decomposition_prompt_version: string | null;
prompt_fingerprint: string | null;
autogen_personality_id: string | null;
autogen_personality_prompt: string | null;
} | null;
}
interface AutoGenPersonalityCatalogItem {
id: string;
label: string;
domain: string | null;
default_prompt: string;
source: "built_in" | "capabilities_registry";
}
interface AutoGenLlmRuntimeConfig {
llm_provider: "openai" | "local";
api_key: string;
model: string;
base_url: string | null;
temperature: number | null;
max_output_tokens: number | null;
}
function toRecord(value: unknown): Record<string, unknown> | null {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value as Record<string, unknown>;
}
function toArray(value: unknown): unknown[] {
return Array.isArray(value) ? value : [];
}
function toStringSafe(value: unknown): string | null {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toNumberSafe(value: unknown): number | null {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function toBooleanSafe(value: unknown): boolean | null {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "string") {
const lowered = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(lowered)) return true;
if (["0", "false", "no", "off"].includes(lowered)) return false;
}
return null;
}
function parseDateMs(value: unknown): number | null {
const asString = toStringSafe(value);
if (!asString) {
return null;
}
const ms = Date.parse(asString);
return Number.isFinite(ms) ? ms : null;
}
function clampInt(value: number | null, min: number, max: number, fallback: number): number {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
const rounded = Math.trunc(value);
if (rounded < min) return min;
if (rounded > max) return max;
return rounded;
}
function parseManualCaseDecision(value: unknown, fallback: ManualCaseDecision = "needs_dialog_policy_fix"): ManualCaseDecision {
const normalized = toStringSafe(value);
if (!normalized) return fallback;
return (MANUAL_CASE_DECISIONS.includes(normalized as ManualCaseDecision) ? normalized : fallback) as ManualCaseDecision;
}
function parseAnnotationAuthor(value: unknown): string | null {
const author = toStringSafe(value);
if (!author) return null;
return author.slice(0, 80);
}
function parseAnnotationResolved(value: unknown, fallback = false): boolean {
const parsed = toBooleanSafe(value);
return parsed === null ? fallback : parsed;
}
function readManualDecisionSchema(): Record<string, unknown> {
const fallback: Record<string, unknown> = {
schema_version: "manual_case_decision_schema_v1_fallback",
enum: MANUAL_CASE_DECISIONS,
labels: {
covered_ok: "Покрыто и ок",
covered_but_bad_answer: "Покрыто, но ответ плохой",
candidate_for_implementation: "Кандидат на внедрение",
needs_routing_extension: "Нужно расширение маршрутизации",
out_of_scope_but_answer_softly: "Вне скоупа, но нужен мягкий ответ",
unsafe_question_limit_strictly: "Высокий риск, строгие ограничения",
needs_dialog_policy_fix: "Нужен фикс диалоговой политики",
needs_capability_registry_update: "Нужно обновить реестр возможностей",
bad_test_case: "Плохой тест-кейс"
},
queue_mapping: DECISION_QUEUE_MAP
};
if (!fs.existsSync(MANUAL_CASE_DECISION_SCHEMA_FILE)) {
return fallback;
}
try {
const parsed = JSON.parse(fs.readFileSync(MANUAL_CASE_DECISION_SCHEMA_FILE, "utf-8")) as unknown;
const record = toRecord(parsed);
return record ?? fallback;
} catch {
return fallback;
}
}
function readAutoGenHistory(): AutoGenHistoryRecord[] {
if (!fs.existsSync(AUTORUN_GENERATOR_HISTORY_FILE)) return [];
try {
const parsed = JSON.parse(fs.readFileSync(AUTORUN_GENERATOR_HISTORY_FILE, "utf-8")) as unknown;
if (!Array.isArray(parsed)) return [];
return parsed
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null)
.map((item) => ({
generation_id: toStringSafe(item.generation_id) ?? "",
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
mode: (toStringSafe(item.mode) as AutoGenMode | null) ?? "codex_creative",
count: clampInt(toNumberSafe(item.count), 1, 300, 20),
domain: toStringSafe(item.domain),
questions: toArray(item.questions)
.map((q) => toStringSafe(q))
.filter((q): q is string => q !== null)
.map((q) => sanitizeGeneratedQuestion(q))
.filter((q) => q.length > 0)
.slice(0, 500),
generated_by: toStringSafe(item.generated_by),
saved_case_set_file: toStringSafe(item.saved_case_set_file),
context: toRecord(item.context)
? {
llm_provider: toStringSafe(toRecord(item.context)?.llm_provider),
model: toStringSafe(toRecord(item.context)?.model),
assistant_prompt_version: toStringSafe(toRecord(item.context)?.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(toRecord(item.context)?.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(toRecord(item.context)?.prompt_fingerprint)
? repairAutogenMojibake(String(toRecord(item.context)?.prompt_fingerprint))
: null,
autogen_personality_id: toStringSafe(toRecord(item.context)?.autogen_personality_id),
autogen_personality_prompt: toStringSafe(toRecord(item.context)?.autogen_personality_prompt)
? repairAutogenMojibake(String(toRecord(item.context)?.autogen_personality_prompt))
: null
}
: null
}))
.filter((item) => item.generation_id.length > 0)
.sort((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at));
} catch {
return [];
}
}
function writeAutoGenHistory(records: AutoGenHistoryRecord[]): void {
const dir = path.dirname(AUTORUN_GENERATOR_HISTORY_FILE);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(AUTORUN_GENERATOR_HISTORY_FILE, JSON.stringify(records, null, 2), "utf-8");
}
function readEvalDatasetCases(filePath: string): Array<Record<string, unknown>> {
try {
const parsed = JSON.parse(fs.readFileSync(filePath, "utf-8")) as unknown;
if (Array.isArray(parsed)) {
return parsed.map((item) => toRecord(item)).filter((item): item is Record<string, unknown> => item !== null);
}
const record = toRecord(parsed);
if (!record) return [];
const cases = toArray(record.cases).map((item) => toRecord(item)).filter((item): item is Record<string, unknown> => item !== null);
return cases;
} catch {
return [];
}
}
function collectCanonicalQuestions(limit = 300): string[] {
if (!fs.existsSync(EVAL_DATASETS_DIR)) {
return [];
}
const entries = fs.readdirSync(EVAL_DATASETS_DIR, { withFileTypes: true });
const questions: string[] = [];
for (const entry of entries) {
if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
const fullPath = path.resolve(EVAL_DATASETS_DIR, entry.name);
const cases = readEvalDatasetCases(fullPath);
for (const testCase of cases) {
const rawQuestion = toStringSafe(testCase.raw_question) ?? toStringSafe(testCase.user_message) ?? toStringSafe(testCase.query);
if (rawQuestion) {
questions.push(sanitizeGeneratedQuestion(rawQuestion));
}
}
}
return Array.from(new Set(questions.filter((item) => item.length > 0))).slice(0, limit);
}
function normalizeDomainHint(value: unknown): string | null {
const domain = toStringSafe(value);
if (!domain) return null;
return domain.toLowerCase();
}
function buildAutogenPromptFromCapabilityGroup(group: CapabilityGroup): string {
const supported = group.supported_operations.slice(0, 3).join(", ");
const examples = group.typical_queries.slice(0, 2).join(" | ");
const hints = group.one_c_hints.slice(0, 2).join(", ");
const operationsPart = supported ? ` Опирайся на операции: ${supported}.` : "";
const examplesPart = examples ? ` Ближайшие формулировки: ${examples}.` : "";
const hintsPart = hints ? ` Можно мягко упоминать контекст 1С: ${hints}.` : "";
return (
`Генерируй реалистичные вопросы бухгалтера по группе "${group.group_title}".` +
` Добавляй живую разговорную форму и опечатки, но сохраняй бизнес-смысл.${operationsPart}${examplesPart}${hintsPart}` +
" Не выдумывай операции вне read-only режима."
);
}
function buildAutogenPersonalityCatalog(): AutoGenPersonalityCatalogItem[] {
const builtIn: AutoGenPersonalityCatalogItem[] = [
{
id: "general",
label: "Общий контур",
domain: null,
default_prompt:
"Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл.",
source: "built_in"
}
];
const registry = loadCapabilitiesRegistry();
const registryBased = registry.groups.map<AutoGenPersonalityCatalogItem>((group) => ({
id: `registry_${group.group_code}`,
label: `${group.group_title} (реестр)`,
domain: group.group_code,
default_prompt: buildAutogenPromptFromCapabilityGroup(group),
source: "capabilities_registry"
}));
const dedup = new Map<string, AutoGenPersonalityCatalogItem>();
for (const item of [...builtIn, ...registryBased]) {
if (!item.id.trim()) continue;
if (!dedup.has(item.id)) {
dedup.set(item.id, item);
}
}
return [...dedup.values()].map((item) => ({
...item,
label: repairAutogenMojibake(item.label),
default_prompt: repairAutogenMojibake(item.default_prompt)
}));
}
function fallbackDomainTemplates(domain: string | null): string[] {
if (domain?.includes("vat") || domain?.includes("ндс")) {
return [
"Сколько НДС к уплате на дату по организации?",
"Покажи прогноз НДС за период по организации.",
"Почему по НДС сейчас ноль и из чего сложился расчет?"
];
}
if (domain?.includes("counter") || domain?.includes("контраг")) {
return [
"Покажи топ контрагентов по сумме платежей за период.",
"Какой самый крупный договор у выбранной организации?",
"Какие документы были по контрагенту за весь период?"
];
}
if (domain?.includes("settlement") || domain?.includes("задолж") || domain?.includes("расчет")) {
return [
"Какие незакрытые расчеты висят на конец периода?",
"Есть ли незакрытые авансы по поставщикам?",
"Покажи цепочки закрытия по счетам 60/62."
];
}
return [
"С какой организацией сейчас можно работать в активном контуре?",
"Покажи ключевые операции за выбранный период.",
"Какие вопросы по этому домену ассистент поддерживает прямо сейчас?"
];
}
function mutateIntoQwenStyle(base: string, index: number): string {
const wrappers = ["йо ", "слушай ", "подскажи плиз ", "короче ", "мож ", "а ну-ка "];
const tails = ["", " без воды", " по факту", " и коротко", " прям сейчас", " за весь период"];
const typoMap: Array<[RegExp, string]> = [
[/\bкомпания\b/gi, "компиния"],
[/\bсейчас\b/gi, "щас"],
[/\bпожалуйста\b/gi, "плиз"],
[/\bкакая\b/gi, "кака"],
[/\bчто\b/gi, "че"]
];
const prefix = wrappers[index % wrappers.length];
const tail = tails[index % tails.length];
let text = `${prefix}${base}${tail}`.trim();
if (index % 2 === 0) {
const [pattern, replacement] = typoMap[index % typoMap.length];
text = text.replace(pattern, replacement);
}
return text;
}
function generateQwenSeedQuestions(count: number, domain: string | null): string[] {
const seed = collectCanonicalQuestions(450);
const source = seed.length > 0 ? seed : fallbackDomainTemplates(domain);
const filtered = domain
? source.filter((item) => item.toLowerCase().includes(domain) || fallbackDomainTemplates(domain).includes(item))
: source;
const bag = filtered.length > 0 ? filtered : source;
const out: string[] = [];
for (let index = 0; index < count; index += 1) {
const base = bag[index % bag.length];
out.push(sanitizeGeneratedQuestion(mutateIntoQwenStyle(base, index)));
}
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
}
function generateCodexCreativeQuestions(count: number, domain: string | null): string[] {
const domainTemplates = fallbackDomainTemplates(domain);
const patterns = [
"Дай бизнес-срез по состоянию на дату: {q}",
"Нужен аккуратный ответ как бухгалтеру: {q}",
"Если данных не хватает, скажи что уточнить, но сначала попробуй: {q}",
"Сформулируй результат без технички и с шагом дальше: {q}",
"Проверь в read-only и скажи что видно: {q}"
];
const out: string[] = [];
for (let index = 0; index < count; index += 1) {
const base = domainTemplates[index % domainTemplates.length];
const pattern = patterns[index % patterns.length];
out.push(sanitizeGeneratedQuestion(pattern.replace("{q}", base)));
}
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
}
function generateAutogenId(): string {
return `gen-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function readAnnotations(): AutoRunAnnotationRecord[] {
if (!fs.existsSync(AUTORUN_ANNOTATIONS_FILE)) {
return [];
}
try {
const raw = fs.readFileSync(AUTORUN_ANNOTATIONS_FILE, "utf-8");
const parsed = JSON.parse(raw) as unknown;
if (!Array.isArray(parsed)) {
return [];
}
return parsed
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null)
.map((item) => {
const context = toRecord(item.context);
return {
annotation_id: toStringSafe(item.annotation_id) ?? "",
run_id: toStringSafe(item.run_id) ?? "",
case_id: toStringSafe(item.case_id) ?? "",
session_id: toStringSafe(item.session_id) ?? "",
message_index: clampInt(toNumberSafe(item.message_index), 0, 100_000, 0),
rating: clampInt(toNumberSafe(item.rating), 1, 5, 1),
comment: toStringSafe(item.comment) ?? "",
manual_case_decision: parseManualCaseDecision(item.manual_case_decision),
annotation_author: parseAnnotationAuthor(item.annotation_author),
resolved: parseAnnotationResolved(item.resolved),
resolved_at: toStringSafe(item.resolved_at),
resolved_by: parseAnnotationAuthor(item.resolved_by),
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
updated_at: toStringSafe(item.updated_at) ?? new Date().toISOString(),
context: {
message_id: toStringSafe(context?.message_id),
trace_id: toStringSafe(context?.trace_id),
reply_type: toStringSafe(context?.reply_type),
eval_target: (toStringSafe(context?.eval_target) as AutoRunTarget | null) ?? "unknown",
prompt_version: toStringSafe(context?.prompt_version),
domain: toStringSafe(context?.domain),
query_class: toStringSafe(context?.query_class),
question_text: toStringSafe(context?.question_text),
answer_text: toStringSafe(context?.answer_text)
}
} satisfies AutoRunAnnotationRecord;
})
.filter((item) => item.annotation_id && item.run_id && item.case_id);
} catch {
return [];
}
}
function writeAnnotations(items: AutoRunAnnotationRecord[]): void {
fs.writeFileSync(AUTORUN_ANNOTATIONS_FILE, JSON.stringify(items, null, 2), "utf-8");
}
function annotationKey(runId: string, caseId: string, messageIndex: number): string {
return `${runId}::${caseId}::${messageIndex}`;
}
function buildAnnotationStatsMap(runId: string, annotations: AutoRunAnnotationRecord[]): Map<string, AnnotationStatsByCase> {
const scoped = annotations.filter((item) => item.run_id === runId);
const buckets = new Map<string, { count: number; ratings: number[]; latestMs: number | null }>();
for (const item of scoped) {
const bucket = buckets.get(item.case_id) ?? { count: 0, ratings: [], latestMs: null };
bucket.count += 1;
bucket.ratings.push(item.rating);
const ms = Date.parse(item.updated_at);
if (Number.isFinite(ms) && (bucket.latestMs === null || ms > bucket.latestMs)) {
bucket.latestMs = ms;
}
buckets.set(item.case_id, bucket);
}
const result = new Map<string, AnnotationStatsByCase>();
for (const [caseId, bucket] of buckets.entries()) {
const avg = bucket.ratings.length > 0 ? Number((bucket.ratings.reduce((a, b) => a + b, 0) / bucket.ratings.length).toFixed(2)) : null;
result.set(caseId, {
count: bucket.count,
latest_at: bucket.latestMs === null ? null : new Date(bucket.latestMs).toISOString(),
avg_rating: avg
});
}
return result;
}
function buildAnnotationsByMessageIndex(runId: string, caseId: string, annotations: AutoRunAnnotationRecord[]): Map<number, AutoRunAnnotationRecord> {
const map = new Map<number, AutoRunAnnotationRecord>();
for (const item of annotations) {
if (item.run_id !== runId || item.case_id !== caseId) continue;
const current = map.get(item.message_index);
const currentMs = current ? Date.parse(current.updated_at) : null;
const nextMs = Date.parse(item.updated_at);
if (!current || (!Number.isNaN(nextMs) && (currentMs === null || nextMs >= currentMs))) {
map.set(item.message_index, item);
}
}
return map;
}
function resolveRunTarget(input: { report: Record<string, unknown>; runId: string; reportPath: string }): AutoRunTarget {
const explicit = toStringSafe(input.report.eval_target);
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
return explicit;
}
if (input.runId.startsWith("assistant-stage1-")) return "assistant_stage1";
if (input.runId.startsWith("assistant-stage2-")) return "assistant_stage2";
if (input.runId.startsWith("assistant-p0-")) return "assistant_p0";
if (input.runId.startsWith("eval-")) return "normalizer";
if (input.reportPath.endsWith(".report.json")) return "normalizer";
return "unknown";
}
function normalizeTimestamp(report: Record<string, unknown>, fileMtimeMs: number): { iso: string; ms: number } {
const first = parseDateMs(report.run_timestamp);
if (first !== null) {
return { iso: new Date(first).toISOString(), ms: first };
}
const second = parseDateMs(report.timestamp);
if (second !== null) {
return { iso: new Date(second).toISOString(), ms: second };
}
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
}
function rateToPercent(value: number | null): number | null {
if (value === null) return null;
if (value <= 1.2) return Math.max(0, Math.min(100, value * 100));
return Math.max(0, Math.min(100, value));
}
function scoreToPercent(value: number | null): number | null {
if (value === null) return null;
if (value <= 5.2) return Math.max(0, Math.min(100, (value / 5) * 100));
return Math.max(0, Math.min(100, value));
}
function average(values: Array<number | null>): number | null {
const filtered = values.filter((item): item is number => typeof item === "number" && Number.isFinite(item));
if (filtered.length === 0) {
return null;
}
const sum = filtered.reduce((acc, item) => acc + item, 0);
return Number((sum / filtered.length).toFixed(2));
}
function getMetricRecord(report: Record<string, unknown>): Record<string, unknown> | null {
const metrics = toRecord(report.metrics);
if (!metrics) return null;
const raw = toRecord(metrics.raw);
return raw ?? metrics;
}
function computeScoreIndex(report: Record<string, unknown>, target: AutoRunTarget): number | null {
const metrics = getMetricRecord(report);
if (!metrics) {
return null;
}
if (target === "assistant_p0") {
return average([
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
]);
}
if (target === "assistant_stage1") {
return average([
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score))
]);
}
if (target === "assistant_stage2") {
return average([
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
]);
}
return average([
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
]);
}
function countFailures(report: Record<string, unknown>): { blocking: number; quality: number } {
const acceptanceGate = toRecord(report.acceptance_gate);
const baselineGate = toRecord(report.baseline_stability_gate);
const blocking =
toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
const quality =
toArray(acceptanceGate?.quality_failures).length +
toArray(baselineGate?.legacy_quality_failures).length +
toArray(baselineGate?.quality_gap_failures).length;
return { blocking, quality };
}
function caseScoreFromMetricSubscores(metricSubscores: Record<string, unknown> | null): number | null {
if (!metricSubscores) return null;
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
if (directProduct !== null) {
return Number(directProduct.toFixed(2));
}
const candidates: Array<number | null> = [
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
];
return average(candidates);
}
function isCaseClosed(input: {
checks: Record<string, unknown> | null;
scoreIndex: number | null;
}): boolean | null {
const checks = input.checks;
if (checks) {
const routeCorrect = toBooleanSafe(checks.route_correct);
const domainPure = toBooleanSafe(checks.domain_pure);
const problemFirst = toBooleanSafe(checks.problem_first_answer);
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
if (routeCorrect === false) return false;
if (domainPure === false) return false;
if (problemFirst === false) return false;
return true;
}
}
if (typeof input.scoreIndex === "number") {
return input.scoreIndex >= 65;
}
return null;
}
function getResultCases(report: Record<string, unknown>): Array<Record<string, unknown>> {
return toArray(report.results)
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null);
}
function buildCaseSummaries(
report: Record<string, unknown>,
runId: string,
checkDialogAvailability: boolean,
annotationStatsByCase?: Map<string, AnnotationStatsByCase>
): CaseSummary[] {
const results = getResultCases(report);
return results.map((item, index) => {
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
const checks = toRecord(item.checks);
const metricSubscores = toRecord(item.metric_subscores);
const scoreIndex =
caseScoreFromMetricSubscores(metricSubscores) ??
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
null;
const closedState = isCaseClosed({ checks, scoreIndex });
const sessionId = `${runId}-${caseId}`;
const dialogAvailable = checkDialogAvailability
? fs.existsSync(path.resolve(ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
: false;
const annotationStats = annotationStatsByCase?.get(caseId);
return {
case_id: caseId,
domain: toStringSafe(item.domain),
query_class: toStringSafe(item.query_class),
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type),
session_id: sessionId,
dialog_available: dialogAvailable,
commented_count: annotationStats?.count ?? 0,
latest_annotation_at: annotationStats?.latest_at ?? null,
avg_rating: annotationStats?.avg_rating ?? null,
checks,
metric_subscores: metricSubscores
};
});
}
function buildCoverageFromCases(cases: CaseSummary[]): RunCoverage {
const coverageByDomain = new Map<string, { total: number; closed: number }>();
let closedCases = 0;
let openCases = 0;
for (const item of cases) {
if (item.status === "closed") closedCases += 1;
if (item.status === "open") openCases += 1;
const domainKey = item.domain ?? "unknown";
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
current.total += 1;
if (item.status === "closed") current.closed += 1;
coverageByDomain.set(domainKey, current);
}
const domainCoverage = Array.from(coverageByDomain.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
return {
closed_cases: closedCases,
open_cases: openCases,
domain_coverage: domainCoverage
};
}
function collectJsonCandidates(scanLimit: number): Array<{ path: string; mtimeMs: number }> {
const candidates: Array<{ path: string; mtimeMs: number }> = [];
const sources: Array<{ dir: string; suffix: string }> = [
{ dir: REPORTS_DIR, suffix: ".json" },
{ dir: EVAL_CASES_DIR, suffix: ".report.json" }
];
for (const source of sources) {
if (!fs.existsSync(source.dir)) continue;
const entries = fs.readdirSync(source.dir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile()) continue;
if (!entry.name.endsWith(source.suffix)) continue;
const fullPath = path.resolve(source.dir, entry.name);
try {
const stat = fs.statSync(fullPath);
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
} catch {
// skip broken file stat
}
}
}
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
}
function indexRuns(scanLimit: number): IndexedRun[] {
const files = collectJsonCandidates(scanLimit);
const dedup = new Map<string, IndexedRun>();
for (const item of files) {
let parsed: unknown;
try {
const raw = fs.readFileSync(item.path, "utf-8");
parsed = JSON.parse(raw) as unknown;
} catch {
continue;
}
const report = toRecord(parsed);
if (!report) continue;
const runId = toStringSafe(report.run_id);
if (!runId) continue;
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
const indexed: IndexedRun = {
run_id: runId,
eval_target: evalTarget,
report_path: item.path,
report,
timestamp_iso: normalizedTime.iso,
timestamp_ms: normalizedTime.ms
};
const current = dedup.get(runId);
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
dedup.set(runId, indexed);
}
}
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
}
function parseFilters(query: Record<string, unknown>): RunFilters {
const fromMs = parseDateMs(query.from);
const toMs = parseDateMs(query.to);
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
const target =
targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
? targetRaw
: "all";
const useMock = toStringSafe(query.use_mock);
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
return {
from_ms: fromMs,
to_ms: toMs,
target,
use_mock: useMockFilter,
prompt_contains: promptContains,
mode,
limit,
scan_limit: scanLimit
};
}
function matchesFilters(run: IndexedRun, filters: RunFilters): boolean {
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms) return false;
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms) return false;
if (filters.target !== "all" && run.eval_target !== filters.target) return false;
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
if (filters.mode !== "all" && modeValue !== filters.mode) return false;
if (filters.use_mock !== null) {
const useMockValue = toBooleanSafe(run.report.use_mock);
if (useMockValue !== filters.use_mock) return false;
}
if (filters.prompt_contains.length > 0) {
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
if (!promptVersion.includes(filters.prompt_contains)) return false;
}
return true;
}
function buildRunSummary(run: IndexedRun): RunSummary {
const connection = toRecord(run.report.connection);
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
const llmProvider =
toStringSafe(run.report.llm_provider) ??
toStringSafe(run.report.llmProvider) ??
toStringSafe(connection?.llm_provider) ??
toStringSafe(connection?.llmProvider) ??
toStringSafe(normalizeConfig?.llm_provider) ??
toStringSafe(normalizeConfig?.llmProvider);
const model =
toStringSafe(run.report.model) ??
toStringSafe(connection?.model) ??
toStringSafe(normalizeConfig?.model);
const cases = buildCaseSummaries(run.report, run.run_id, false);
const coverage = buildCoverageFromCases(cases);
const failures = countFailures(run.report);
return {
run_id: run.run_id,
eval_target: run.eval_target,
run_timestamp: run.timestamp_iso,
mode: toStringSafe(run.report.mode),
llm_provider: llmProvider,
model,
use_mock: toBooleanSafe(run.report.use_mock),
prompt_version: toStringSafe(run.report.prompt_version),
schema_version: toStringSafe(run.report.schema_version),
suite_id: toStringSafe(run.report.suite_id),
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
report_path: run.report_path,
score_index: computeScoreIndex(run.report, run.eval_target),
blocking_failures: failures.blocking,
quality_failures: failures.quality,
closed_cases: coverage.closed_cases,
open_cases: coverage.open_cases,
domain_coverage: coverage.domain_coverage
};
}
function mergeDomainCoverage(summaries: RunSummary[]): DomainCoverage[] {
const merged = new Map<string, { total: number; closed: number }>();
for (const summary of summaries) {
for (const item of summary.domain_coverage) {
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
current.total += item.total_cases;
current.closed += item.closed_cases;
merged.set(item.domain, current);
}
}
return Array.from(merged.entries())
.map(([domain, value]) => ({
domain,
total_cases: value.total,
closed_cases: value.closed
}))
.sort((a, b) => b.total_cases - a.total_cases);
}
function buildHistoryStats(summaries: RunSummary[]): HistoryStats {
const byTarget: Record<string, number> = {};
let blockingRuns = 0;
let qualityRuns = 0;
const scoreValues: number[] = [];
for (const item of summaries) {
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
if (item.blocking_failures > 0) blockingRuns += 1;
if (item.quality_failures > 0) qualityRuns += 1;
if (typeof item.score_index === "number") scoreValues.push(item.score_index);
}
const latestScore = typeof summaries[0]?.score_index === "number" ? (summaries[0].score_index as number) : null;
const previousScore = typeof summaries[1]?.score_index === "number" ? (summaries[1].score_index as number) : null;
const trend: AutoRunTrend =
latestScore === null || previousScore === null
? "flat"
: latestScore > previousScore + 0.5
? "up"
: latestScore < previousScore - 0.5
? "down"
: "flat";
return {
runs_total: summaries.length,
by_target: byTarget,
blocking_runs: blockingRuns,
quality_gap_runs: qualityRuns,
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
latest_score_index: latestScore,
previous_score_index: previousScore,
trend,
domain_coverage: mergeDomainCoverage(summaries)
};
}
function findRunById(runId: string, scanLimit = 3000): IndexedRun | null {
const indexed = indexRuns(scanLimit);
return indexed.find((item) => item.run_id === runId) ?? null;
}
function buildAssistantModeSummary(dialogRecord: Record<string, unknown> | null): Record<string, unknown> | null {
if (!dialogRecord) return null;
const conversation = toArray(dialogRecord.conversation)
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null);
const lastAssistant = [...conversation]
.reverse()
.find((item) => toStringSafe(item.role) === "assistant");
const debug = toRecord(lastAssistant?.debug);
return {
reply_type: toStringSafe(lastAssistant?.reply_type),
trace_id: toStringSafe(lastAssistant?.trace_id),
detected_mode: toStringSafe(debug?.detected_mode),
execution_lane: toStringSafe(debug?.execution_lane),
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
living_router_mode: toStringSafe(debug?.living_router_mode),
fallback_type: toStringSafe(debug?.fallback_type)
};
}
function loadSessionDialog(runId: string, caseId: string): {
source: "assistant_session";
session_id: string;
messages: Array<Record<string, unknown>>;
decomposition: string[];
assistant_mode: Record<string, unknown> | null;
} | null {
const sessionId = `${runId}-${caseId}`;
const filePath = path.resolve(ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
if (!fs.existsSync(filePath)) {
return null;
}
let parsed: unknown;
try {
parsed = JSON.parse(fs.readFileSync(filePath, "utf-8")) as unknown;
} catch {
return null;
}
const record = toRecord(parsed);
if (!record) return null;
const conversation = toArray(record.conversation)
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null);
const messages = conversation.map((item) => ({
message_id: toStringSafe(item.message_id),
role: toStringSafe(item.role) ?? "unknown",
text: toStringSafe(item.text) ?? "",
created_at: toStringSafe(item.created_at),
trace_id: toStringSafe(item.trace_id),
reply_type: toStringSafe(item.reply_type)
}));
const turns = toArray(record.turns)
.map((item) => toRecord(item))
.filter((item): item is Record<string, unknown> => item !== null);
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
const humanReadable = toRecord(lastTurn?.human_readable);
const decomposition = toArray(humanReadable?.decomposition)
.map((item) => toStringSafe(item))
.filter((item): item is string => item !== null);
return {
source: "assistant_session",
session_id: sessionId,
messages,
decomposition,
assistant_mode: buildAssistantModeSummary(record)
};
}
function buildFallbackDialog(run: IndexedRun, caseId: string): {
source: "report_fallback" | "none";
session_id: string;
messages: Array<Record<string, unknown>>;
decomposition: string[];
assistant_mode: Record<string, unknown> | null;
} {
const sessionId = `${run.run_id}-${caseId}`;
const results = getResultCases(run.report);
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
if (!targetCase) {
return {
source: "none",
session_id: sessionId,
messages: [],
decomposition: [],
assistant_mode: null
};
}
const userText =
toStringSafe(targetCase.raw_question) ??
toStringSafe(targetCase.user_query_raw) ??
`Case ${caseId}`;
const assistantSummaryParts: string[] = [];
const validationPassed = toBooleanSafe(targetCase.validation_passed);
if (validationPassed !== null) assistantSummaryParts.push(`validation_passed=${validationPassed}`);
const routeMatch = toBooleanSafe(targetCase.route_match);
if (routeMatch !== null) assistantSummaryParts.push(`route_match=${routeMatch}`);
const intentMatch = toBooleanSafe(targetCase.intent_match);
if (intentMatch !== null) assistantSummaryParts.push(`intent_match=${intentMatch}`);
const confidence = toStringSafe(targetCase.confidence_overall);
if (confidence) assistantSummaryParts.push(`confidence=${confidence}`);
const metricSubscores = toRecord(targetCase.metric_subscores);
if (metricSubscores) {
for (const [key, value] of Object.entries(metricSubscores)) {
if (toNumberSafe(value) !== null) {
assistantSummaryParts.push(`${key}=${value}`);
}
}
}
if (assistantSummaryParts.length === 0) {
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
}
return {
source: "report_fallback",
session_id: sessionId,
messages: [
{
message_id: null,
role: "user",
text: userText,
created_at: null,
trace_id: null,
reply_type: null
},
{
message_id: null,
role: "assistant",
text: assistantSummaryParts.join("\n"),
created_at: null,
trace_id: toStringSafe(targetCase.trace_id),
reply_type: toStringSafe(targetCase.reply_type)
}
],
decomposition: [],
assistant_mode: null
};
}
function withMessageAnnotations(
runId: string,
caseId: string,
messages: Array<Record<string, unknown>>,
annotations: AutoRunAnnotationRecord[]
): Array<Record<string, unknown>> {
const byIndex = buildAnnotationsByMessageIndex(runId, caseId, annotations);
return messages.map((message, index) => {
const annotation = byIndex.get(index) ?? null;
return {
...message,
message_index: index,
commented: annotation !== null,
annotation
};
});
}
function buildRunAggregateDialog(
run: IndexedRun,
annotations: AutoRunAnnotationRecord[]
): {
source: "run_aggregate";
session_id: string;
messages: Array<Record<string, unknown>>;
decomposition: string[];
assistant_mode: Record<string, unknown> | null;
} {
const cases = buildCaseSummaries(run.report, run.run_id, false);
const messages: Array<Record<string, unknown>> = [];
const decomposition: string[] = [];
let globalMessageIndex = 0;
for (const item of cases) {
const caseId = item.case_id;
const caseDialog = loadSessionDialog(run.run_id, caseId) ?? buildFallbackDialog(run, caseId);
const annotatedCaseMessages = withMessageAnnotations(run.run_id, caseId, caseDialog.messages, annotations);
for (const caseMessage of annotatedCaseMessages) {
const localMessageIndex = toNumberSafe(caseMessage.message_index) ?? 0;
messages.push({
...caseMessage,
case_id: caseId,
case_message_index: localMessageIndex,
message_index: globalMessageIndex
});
globalMessageIndex += 1;
}
if (caseDialog.decomposition.length > 0) {
decomposition.push(...caseDialog.decomposition.map((step) => `[${caseId}] ${step}`));
}
}
return {
source: "run_aggregate",
session_id: `${run.run_id}::__all__`,
messages,
decomposition,
assistant_mode: null
};
}
function generateAnnotationId(): string {
return `ann-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
}
function parseComment(value: unknown): string {
const text = toStringSafe(value) ?? "";
return text.trim();
}
function parseDecisionFilter(value: unknown): ManualCaseDecision | "all" {
const normalized = toStringSafe(value);
if (!normalized || normalized === "all") return "all";
return parseManualCaseDecision(normalized);
}
function parseAutoGenMode(value: unknown): AutoGenMode {
const normalized = toStringSafe(value)?.toLowerCase() ?? "";
if (normalized === "qwen_seed" || normalized === "codex_creative") {
return normalized;
}
return "codex_creative";
}
function parseAutogenCount(value: unknown): number {
return clampInt(toNumberSafe(value), 1, 200, 24);
}
function parseAutogenDomain(value: unknown): string | null {
const domain = normalizeDomainHint(value);
if (!domain) return null;
return domain.slice(0, 80);
}
function parseAutogenLlmRuntimeConfig(
body: Record<string, unknown>,
context: Record<string, unknown> | null
): AutoGenLlmRuntimeConfig | null {
const llm = toRecord(body.llm);
const providerRaw = toStringSafe(llm?.llm_provider ?? context?.llm_provider)?.toLowerCase() ?? "";
const model = toStringSafe(llm?.model ?? context?.model);
if (!model || (providerRaw !== "openai" && providerRaw !== "local")) {
return null;
}
return {
llm_provider: providerRaw === "local" ? "local" : "openai",
api_key: toStringSafe(llm?.api_key) ?? "",
model,
base_url: toStringSafe(llm?.base_url),
temperature: toNumberSafe(llm?.temperature),
max_output_tokens: toNumberSafe(llm?.max_output_tokens)
};
}
function textMojibakeScore(value: string): number {
const source = String(value ?? "");
const cyrillic = (source.match(/[А-Яа-яЁё]/g) ?? []).length;
const latin = (source.match(/[A-Za-z]/g) ?? []).length;
const hardMarkers = (source.match(/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/g) ?? []).length;
const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length;
const doubleEncodedMarkers = (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length;
return cyrillic + latin - hardMarkers * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2;
}
function looksLikeMojibake(value: string): boolean {
const source = String(value ?? "");
if (!source.trim()) {
return false;
}
if (/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/.test(source)) {
return true;
}
if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) {
return true;
}
return (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length >= 2;
}
function repairAutogenMojibake(value: string): string {
const source = String(value ?? "");
if (!looksLikeMojibake(source)) {
return source;
}
let candidate = source;
for (let pass = 0; pass < 3; pass += 1) {
let improved = false;
try {
const fromWin1251 = iconv.encode(candidate, "win1251").toString("utf8");
if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) {
candidate = fromWin1251;
improved = true;
}
} catch {
// ignore
}
try {
const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8");
if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) {
candidate = fromLatin1;
improved = true;
}
} catch {
// ignore
}
if (!improved) {
break;
}
}
return candidate;
}
function sanitizeGeneratedQuestion(value: string): string {
return repairAutogenMojibake(String(value ?? ""))
.replace(/\r/g, " ")
.replace(/\t/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function splitQuestionCandidates(rawText: string): string[] {
const normalized = repairAutogenMojibake(rawText).replace(/\r/g, "\n").trim();
if (!normalized) return [];
const unescaped = normalized.replace(/\\"/g, '"').replace(/\\n/g, "\n");
const byLines = unescaped
.split(/\n+/g)
.map((line) => line.replace(/^\s*(?:[-*•]|\d{1,3}[).:]?)\s*/, ""))
.map((line) => sanitizeGeneratedQuestion(line))
.filter((line) => line.length > 0);
if (byLines.length > 1) {
return byLines;
}
const questionMarkCount = (unescaped.match(/\?/g) ?? []).length;
if (questionMarkCount > 1) {
const byQuestion = unescaped
.split("?")
.map((chunk) => sanitizeGeneratedQuestion(chunk))
.filter((chunk) => chunk.length > 0)
.map((chunk) => (chunk.endsWith("?") ? chunk : `${chunk}?`));
if (byQuestion.length > 1) {
return byQuestion;
}
}
const quoted = Array.from(unescaped.matchAll(/"([^"\n]{6,}?)"/g))
.map((match) => sanitizeGeneratedQuestion(match[1]))
.filter((line) => line.length > 0);
if (quoted.length > 1) {
return quoted;
}
const cleaned = sanitizeGeneratedQuestion(unescaped);
return cleaned ? [cleaned] : [];
}
function parseAutogenOutputJson(rawText: string): unknown | null {
const cleaned = repairAutogenMojibake(rawText)
.trim()
.replace(/^```json\s*/i, "")
.replace(/^```\s*/i, "")
.replace(/```$/i, "")
.trim();
if (!cleaned) return null;
try {
return JSON.parse(cleaned) as unknown;
} catch {
// continue
}
const arrayStart = cleaned.indexOf("[");
const arrayEnd = cleaned.lastIndexOf("]");
if (arrayStart >= 0 && arrayEnd > arrayStart) {
const fragment = cleaned.slice(arrayStart, arrayEnd + 1);
try {
return JSON.parse(fragment) as unknown;
} catch {
// continue
}
}
const objStart = cleaned.indexOf("{");
const objEnd = cleaned.lastIndexOf("}");
if (objStart >= 0 && objEnd > objStart) {
const fragment = cleaned.slice(objStart, objEnd + 1);
try {
return JSON.parse(fragment) as unknown;
} catch {
return null;
}
}
return null;
}
function collectQuestionsFromCandidate(value: unknown, depth = 0): string[] {
if (depth > 5 || value === null || value === undefined) {
return [];
}
if (Array.isArray(value)) {
return value.flatMap((item) => collectQuestionsFromCandidate(item, depth + 1));
}
if (typeof value === "string") {
const text = value.trim();
if (!text) return [];
const nestedParsed = parseAutogenOutputJson(text);
if (nestedParsed !== null) {
const nestedQuestions = collectQuestionsFromCandidate(nestedParsed, depth + 1);
if (nestedQuestions.length > 0) {
return nestedQuestions;
}
}
try {
const decoded = JSON.parse(text) as unknown;
if (decoded !== text) {
const decodedQuestions = collectQuestionsFromCandidate(decoded, depth + 1);
if (decodedQuestions.length > 0) {
return decodedQuestions;
}
}
} catch {
// ignore non-JSON strings
}
return splitQuestionCandidates(text);
}
const record = toRecord(value);
if (!record) {
return [];
}
const fromQuestions = collectQuestionsFromCandidate(record.questions, depth + 1);
if (fromQuestions.length > 0) {
return fromQuestions;
}
const fallbackText = toStringSafe(record.question ?? record.user_message ?? record.text);
return fallbackText ? splitQuestionCandidates(fallbackText) : [];
}
function extractQuestionsFromAutogenOutput(rawText: string): string[] {
const parsed = parseAutogenOutputJson(rawText);
const fromParsed = collectQuestionsFromCandidate(parsed);
if (fromParsed.length > 0) {
return fromParsed;
}
return collectQuestionsFromCandidate(rawText);
}
async function generateQwenSeedQuestionsLive(input: {
count: number;
domain: string | null;
personalityPrompt: string | null;
llmConfig: AutoGenLlmRuntimeConfig;
client: OpenAIResponsesClient;
}): Promise<string[]> {
const seedExamples = collectCanonicalQuestions(40);
const fallbackExamples = fallbackDomainTemplates(input.domain);
const examples = (seedExamples.length > 0 ? seedExamples : fallbackExamples).slice(0, 8);
const personalityPrompt =
input.personalityPrompt ??
"Генерируй реалистичные вопросы бухгалтера по 1С. Разговорный стиль допустим, но смысл должен быть четким.";
const repairedPersonalityPrompt = repairAutogenMojibake(personalityPrompt);
const maxOutputTokens = clampInt(input.llmConfig.max_output_tokens, 300, 3000, 1200);
const temperature = input.llmConfig.temperature === null ? 0.5 : Math.max(0, Math.min(1.5, input.llmConfig.temperature));
const systemPrompt = [
"Ты генератор вопросов для автопрогонов бухгалтерского ассистента по 1С.",
"Возвращай только JSON и никаких пояснений.",
"Ассистент работает в read-only режиме: не проси действий изменения базы."
].join(" ");
const repairedSystemPrompt = repairAutogenMojibake(systemPrompt);
const developerPrompt = [
`Нужно сгенерировать ровно ${input.count} вопросов.`,
"Формат ответа строго:",
'{"questions":["вопрос 1","вопрос 2"]}',
"Требования:",
"1) каждый вопрос отдельный, без дубликатов;",
"2) живой пользовательский язык;",
"3) допустимы легкие разговорные сокращения;",
"4) не выдавай мета-комментарии и не описывай правила."
].join("\n");
const repairedDeveloperPrompt = repairAutogenMojibake(developerPrompt);
const userMessage = [
`Домен: ${input.domain ?? "general"}.`,
`Промпт личности: ${repairedPersonalityPrompt}`,
"Примеры ориентиров по стилю и тематике:",
...examples.map((item, index) => `${index + 1}. ${item}`)
].join("\n");
const repairedUserMessage = repairAutogenMojibake(userMessage);
const response = await input.client.chat(
{
llmProvider: input.llmConfig.llm_provider,
apiKey: input.llmConfig.api_key,
model: input.llmConfig.model,
baseUrl: input.llmConfig.base_url ?? undefined,
temperature,
maxOutputTokens: maxOutputTokens
},
{
systemPrompt: repairedSystemPrompt,
developerPrompt: repairedDeveloperPrompt,
userMessage: repairedUserMessage,
temperature,
maxOutputTokens
}
);
const extracted = extractQuestionsFromAutogenOutput(response.outputText);
const normalized = Array.from(new Set(extracted.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0)));
if (normalized.length === 0) {
throw new ApiError("AUTOGEN_LLM_EMPTY_OUTPUT", "Qwen не вернул пригодные вопросы для автогенерации.", 502, {
model: input.llmConfig.model
});
}
const fallback = generateQwenSeedQuestions(input.count, input.domain);
return Array.from(new Set([...normalized, ...fallback])).slice(0, input.count);
}
function hasAnyRunFilterQuery(query: Record<string, unknown>): boolean {
return Boolean(
toStringSafe(query.from) ??
toStringSafe(query.to) ??
toStringSafe(query.target) ??
toStringSafe(query.mode) ??
toStringSafe(query.use_mock) ??
toStringSafe(query.prompt_contains)
);
}
function buildAutogenCaseSetFileName(mode: AutoGenMode, generationId: string): string {
const now = new Date();
const stamp = [
now.getUTCFullYear(),
String(now.getUTCMonth() + 1).padStart(2, "0"),
String(now.getUTCDate()).padStart(2, "0"),
String(now.getUTCHours()).padStart(2, "0"),
String(now.getUTCMinutes()).padStart(2, "0"),
String(now.getUTCSeconds()).padStart(2, "0")
].join("");
return `assistant_autogen_${mode}_${stamp}_${generationId}.json`;
}
function buildAutogenCaseSetPayload(input: {
generationId: string;
mode: AutoGenMode;
domain: string | null;
questions: string[];
}): Record<string, unknown> {
const normalizedQuestions = Array.from(
new Set(input.questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))
);
const cases = normalizedQuestions.map((question, index) => ({
case_id: `AUTO-${String(index + 1).padStart(3, "0")}`,
scenario_tag: `${input.mode}_${input.domain ?? "general"}`,
question_type: "direct",
broadness_level: "medium",
turns: [{ user_message: question }],
expected_hints: {
expected_reply_type: null,
expected_degraded_to: null
}
}));
return {
suite_id: `assistant_autogen_${input.generationId}`,
suite_version: "0.1.0",
schema_version: "assistant_autogen_suite_v0_1",
generated_at: new Date().toISOString(),
generation_id: input.generationId,
mode: input.mode,
domain: input.domain,
scenario_count: cases.length,
case_ids: cases.map((item) => item.case_id),
cases
};
}
function collectPostAnalysis(
annotations: AutoRunAnnotationRecord[],
runMap: Map<string, IndexedRun>,
limitPerQueue: number
): Record<string, unknown> {
const byDecision: Record<string, number> = {};
const byQueue: Record<string, number> = {};
const byDomain = new Map<string, number>();
const queues: Record<string, Array<Record<string, unknown>>> = {
routing_extension: [],
policy_fix: [],
capability_registry: [],
soft_boundary: [],
safety_policy: [],
testset_hygiene: [],
covered_ok: []
};
const registry = loadCapabilitiesRegistry();
for (const item of annotations) {
byDecision[item.manual_case_decision] = (byDecision[item.manual_case_decision] ?? 0) + 1;
const queueKey = DECISION_QUEUE_MAP[item.manual_case_decision];
byQueue[queueKey] = (byQueue[queueKey] ?? 0) + 1;
const run = runMap.get(item.run_id) ?? null;
const caseSummary = run
? buildCaseSummaries(run.report, run.run_id, false).find((candidate) => candidate.case_id === item.case_id) ?? null
: null;
const nearestGroup =
resolveNearestCapabilityGroup({
domain: caseSummary?.domain ?? item.context.domain,
queryClass: caseSummary?.query_class ?? item.context.query_class
}) ??
registry.groups[0] ??
null;
const domainKey = caseSummary?.domain ?? item.context.domain ?? "unknown";
byDomain.set(domainKey, (byDomain.get(domainKey) ?? 0) + 1);
const view = {
annotation_id: item.annotation_id,
run_id: item.run_id,
case_id: item.case_id,
message_index: item.message_index,
rating: item.rating,
comment: item.comment,
manual_case_decision: item.manual_case_decision,
annotation_author: item.annotation_author,
updated_at: item.updated_at,
domain: caseSummary?.domain ?? item.context.domain ?? null,
query_class: caseSummary?.query_class ?? item.context.query_class ?? null,
trace_id: item.context.trace_id ?? caseSummary?.trace_id ?? null,
reply_type: item.context.reply_type ?? caseSummary?.reply_type ?? null,
nearest_capability_group: nearestGroup
? {
group_code: nearestGroup.group_code,
group_title: nearestGroup.group_title,
maturity_status: nearestGroup.maturity_status
}
: null
};
if (queueKey === "none") {
if (queues.covered_ok.length < limitPerQueue) queues.covered_ok.push(view);
continue;
}
if (!queues[queueKey]) {
queues[queueKey] = [];
}
if (queues[queueKey].length < limitPerQueue) {
queues[queueKey].push(view);
}
}
const domainSummary = Array.from(byDomain.entries())
.map(([domain, total]) => ({ domain, total }))
.sort((a, b) => b.total - a.total);
return {
stats: {
annotations_total: annotations.length,
by_decision: byDecision,
by_queue: byQueue,
domains_total: domainSummary.length
},
domain_summary: domainSummary,
queues,
recommended_regression_candidates: [
...queues.routing_extension.slice(0, 20),
...queues.policy_fix.slice(0, 20),
...queues.safety_policy.slice(0, 20)
].slice(0, 60)
};
}
export function buildAutoRunsRouter(openaiClient = new OpenAIResponsesClient()): Router {
const router = Router();
router.get("/api/autoruns/history", (req, res) => {
const filters = parseFilters(req.query as Record<string, unknown>);
const indexed = indexRuns(filters.scan_limit);
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
const summaries = filtered.map((run) => buildRunSummary(run));
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
const availableModes = Array.from(
new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item): item is string => item !== null))
).sort();
const availablePromptVersions = Array.from(
new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item): item is string => item !== null))
).sort();
ok(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
target: filters.target,
use_mock: filters.use_mock,
prompt_contains: filters.prompt_contains,
mode: filters.mode,
limit: filters.limit,
scan_limit: filters.scan_limit
},
available: {
targets: availableTargets,
modes: availableModes,
prompt_versions: availablePromptVersions
},
items: summaries,
stats: buildHistoryStats(summaries)
});
});
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
if (!runId) {
throw new ApiError("INVALID_RUN_ID", "run_id is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const annotations = readAnnotations();
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const cases = buildCaseSummaries(run.report, run.run_id, true, annotationStatsByCase);
const coverage = buildCoverageFromCases(cases);
ok(res, {
ok: true,
run: buildRunSummary(run),
coverage,
cases,
annotations_summary: {
total: annotations.filter((item) => item.run_id === runId).length
},
report: run.report
});
} catch (error) {
next(error);
}
});
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
try {
const runId = String(req.params.run_id ?? "").trim();
const caseId = String(req.params.case_id ?? "").trim();
if (!runId || !caseId) {
throw new ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const annotations = readAnnotations();
if (caseId === "__all__") {
const dialog = buildRunAggregateDialog(run, annotations);
ok(res, {
ok: true,
run_id: runId,
case_id: "__all__",
...dialog,
annotations: annotations
.filter((item) => item.run_id === runId)
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
});
return;
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
const messages = withMessageAnnotations(runId, caseId, dialog.messages, annotations);
ok(res, {
ok: true,
run_id: runId,
case_id: caseId,
...dialog,
messages,
annotations: annotations
.filter((item) => item.run_id === runId && item.case_id === caseId)
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
});
} catch (error) {
next(error);
}
});
router.get("/api/autoruns/annotations", (req, res, next) => {
try {
const runIdFilter = toStringSafe((req.query as Record<string, unknown>).run_id);
const caseIdFilter = toStringSafe((req.query as Record<string, unknown>).case_id);
const minRatingRaw = toNumberSafe((req.query as Record<string, unknown>).min_rating);
const minRating = minRatingRaw === null ? null : clampInt(minRatingRaw, 1, 5, 1);
const decisionFilter = parseDecisionFilter((req.query as Record<string, unknown>).manual_case_decision);
const limit = clampInt(toNumberSafe((req.query as Record<string, unknown>).limit), 1, 2000, 400);
const scanLimit = clampInt(toNumberSafe((req.query as Record<string, unknown>).scan_limit), 50, 5000, 2500);
const annotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (caseIdFilter ? item.case_id === caseIdFilter : true))
.filter((item) => (minRating === null ? true : item.rating >= minRating))
.filter((item) => (decisionFilter === "all" ? true : item.manual_case_decision === decisionFilter))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, limit);
const runIndex = indexRuns(scanLimit);
const runMap = new Map(runIndex.map((item) => [item.run_id, item]));
const items = annotations.map((item) => {
const run = runMap.get(item.run_id) ?? null;
const runSummary = run ? buildRunSummary(run) : null;
const cases = run ? buildCaseSummaries(run.report, run.run_id, false) : [];
const caseSummary = cases.find((candidate) => candidate.case_id === item.case_id) ?? null;
return {
...item,
run: runSummary,
case_summary: caseSummary,
technical_context: {
report_path: run?.report_path ?? null,
trace_id: item.context.trace_id,
reply_type: item.context.reply_type,
domain: item.context.domain,
query_class: item.context.query_class,
checks: caseSummary?.checks ?? null,
metric_subscores: caseSummary?.metric_subscores ?? null
}
};
});
const avgRating =
items.length > 0 ? Number((items.reduce((acc, item) => acc + item.rating, 0) / items.length).toFixed(2)) : null;
const byDecision = items.reduce<Record<string, number>>((acc, item) => {
acc[item.manual_case_decision] = (acc[item.manual_case_decision] ?? 0) + 1;
return acc;
}, {});
ok(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
case_id: caseIdFilter ?? null,
min_rating: minRating,
manual_case_decision: decisionFilter,
limit
},
stats: {
total: items.length,
avg_rating: avgRating,
by_decision: byDecision
},
available_manual_case_decisions: MANUAL_CASE_DECISIONS,
manual_case_decision_schema: readManualDecisionSchema(),
items
});
} catch (error) {
next(error);
}
});
router.post("/api/autoruns/annotations", (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "JSON body is required", 400);
}
const runId = toStringSafe(body.run_id);
const caseId = toStringSafe(body.case_id);
const messageIndexRaw = toNumberSafe(body.message_index);
const ratingRaw = toNumberSafe(body.rating);
const comment = parseComment(body.comment);
const manualCaseDecision = parseManualCaseDecision(body.manual_case_decision);
const annotationAuthor = parseAnnotationAuthor(body.annotation_author);
if (!runId || !caseId) {
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "run_id and case_id are required", 400);
}
if (messageIndexRaw === null) {
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "message_index is required", 400);
}
const messageIndex = clampInt(messageIndexRaw, 0, 100_000, 0);
if (ratingRaw === null) {
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "rating is required", 400);
}
const rating = clampInt(ratingRaw, 1, 5, 1);
if (comment.length === 0) {
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "comment is required", 400);
}
const run = findRunById(runId);
if (!run) {
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
}
const cases = buildCaseSummaries(run.report, run.run_id, false);
const caseSummary = cases.find((item) => item.case_id === caseId) ?? null;
if (!caseSummary) {
throw new ApiError("AUTORUN_CASE_NOT_FOUND", `Case not found: ${caseId} in run ${runId}`, 404);
}
const sessionDialog = loadSessionDialog(runId, caseId);
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
if (messageIndex >= dialog.messages.length) {
throw new ApiError("AUTORUN_MESSAGE_NOT_FOUND", `Message index ${messageIndex} out of range`, 400);
}
const targetMessage = dialog.messages[messageIndex];
const targetRole = toStringSafe(targetMessage.role) ?? "unknown";
if (targetRole !== "assistant") {
throw new ApiError("AUTORUN_MESSAGE_NOT_ASSISTANT", "Only assistant answers can be annotated", 400);
}
const pairedUserQuestion = [...dialog.messages.slice(0, messageIndex)]
.reverse()
.find((item) => (toStringSafe(item.role) ?? "") === "user");
const nowIso = new Date().toISOString();
const annotations = readAnnotations();
const key = annotationKey(runId, caseId, messageIndex);
const existingIndex = annotations.findIndex((item) => annotationKey(item.run_id, item.case_id, item.message_index) === key);
const existing = existingIndex >= 0 ? annotations[existingIndex] : null;
const annotation: AutoRunAnnotationRecord = {
annotation_id: existing?.annotation_id ?? generateAnnotationId(),
run_id: runId,
case_id: caseId,
session_id: caseSummary.session_id,
message_index: messageIndex,
rating,
comment,
manual_case_decision: manualCaseDecision,
annotation_author: annotationAuthor,
resolved: existing?.resolved ?? false,
resolved_at: existing?.resolved_at ?? null,
resolved_by: existing?.resolved_by ?? null,
created_at: existing?.created_at ?? nowIso,
updated_at: nowIso,
context: {
message_id: toStringSafe(targetMessage.message_id),
trace_id: toStringSafe(targetMessage.trace_id) ?? caseSummary.trace_id,
reply_type: toStringSafe(targetMessage.reply_type) ?? caseSummary.reply_type,
eval_target: run.eval_target,
prompt_version: toStringSafe(run.report.prompt_version),
domain: caseSummary.domain,
query_class: caseSummary.query_class,
question_text: toStringSafe(pairedUserQuestion?.text),
answer_text: toStringSafe(targetMessage.text)
}
};
if (existingIndex >= 0) {
annotations[existingIndex] = annotation;
} else {
annotations.push(annotation);
}
writeAnnotations(annotations);
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
const caseStats = annotationStatsByCase.get(caseId) ?? null;
ok(res, {
ok: true,
annotation,
case_annotation_stats: caseStats
});
} catch (error) {
next(error);
}
});
router.patch("/api/autoruns/annotations/:annotation_id", (req, res, next) => {
try {
const annotationId = toStringSafe(req.params.annotation_id);
if (!annotationId) {
throw new ApiError("INVALID_ANNOTATION_ID", "annotation_id is required", 400);
}
const body = toRecord(req.body);
if (!body) {
throw new ApiError("INVALID_ANNOTATION_PATCH", "JSON body is required", 400);
}
const resolved = toBooleanSafe(body.resolved);
if (resolved === null) {
throw new ApiError("INVALID_ANNOTATION_PATCH", "resolved flag is required", 400);
}
const resolvedBy = parseAnnotationAuthor(body.resolved_by);
const annotations = readAnnotations();
const index = annotations.findIndex((item) => item.annotation_id === annotationId);
if (index < 0) {
throw new ApiError("ANNOTATION_NOT_FOUND", `Annotation not found: ${annotationId}`, 404);
}
const nowIso = new Date().toISOString();
const current = annotations[index];
const updated: AutoRunAnnotationRecord = {
...current,
resolved,
resolved_at: resolved ? nowIso : null,
resolved_by: resolved ? resolvedBy ?? current.resolved_by ?? null : null,
updated_at: nowIso
};
annotations[index] = updated;
writeAnnotations(annotations);
const statsByCase = buildAnnotationStatsMap(updated.run_id, annotations);
const caseStats = statsByCase.get(updated.case_id) ?? null;
ok(res, {
ok: true,
annotation: updated,
case_annotation_stats: caseStats
});
} catch (error) {
next(error);
}
});
router.get("/api/autoruns/manual-decision-schema", (_req, res) => {
ok(res, {
ok: true,
schema: readManualDecisionSchema(),
enum: MANUAL_CASE_DECISIONS
});
});
router.get("/api/autoruns/post-analysis", (req, res, next) => {
try {
const query = req.query as Record<string, unknown>;
const runIdFilter = toStringSafe(query.run_id);
const limitPerQueue = clampInt(toNumberSafe(query.limit_per_queue), 5, 250, 40);
const annotationLimit = clampInt(toNumberSafe(query.annotation_limit), 20, 5000, 1500);
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 2500);
const runFilters = parseFilters(query);
const applyRunFilters = hasAnyRunFilterQuery(query);
const runIndex = indexRuns(Math.max(scanLimit, runFilters.scan_limit));
const filteredRuns = applyRunFilters ? runIndex.filter((run) => matchesFilters(run, runFilters)) : runIndex;
const runMap = new Map(filteredRuns.map((run) => [run.run_id, run]));
const scopedAnnotations = readAnnotations()
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
.filter((item) => (runMap.size > 0 ? runMap.has(item.run_id) : true))
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
.slice(0, annotationLimit);
const analysis = collectPostAnalysis(scopedAnnotations, runMap, limitPerQueue);
ok(res, {
ok: true,
generated_at: new Date().toISOString(),
filters_applied: {
run_id: runIdFilter ?? null,
run_filters_applied: applyRunFilters,
limit_per_queue: limitPerQueue,
annotation_limit: annotationLimit,
scan_limit: scanLimit
},
runs_considered: filteredRuns.slice(0, 500).map((item) => buildRunSummary(item)),
manual_case_decision_schema: readManualDecisionSchema(),
post_analysis: analysis
});
} catch (error) {
next(error);
}
});
router.get("/api/autoruns/autogen/history", (req, res, next) => {
try {
const limit = clampInt(toNumberSafe((req.query as Record<string, unknown>).limit), 1, 500, 120);
const rawMode = toStringSafe((req.query as Record<string, unknown>).mode);
const includeAllModes = !rawMode || !["qwen_seed", "codex_creative"].includes(rawMode);
const modeFilter = (rawMode as AutoGenMode | null) ?? "codex_creative";
const items = readAutoGenHistory()
.filter((item) => (includeAllModes ? true : item.mode === modeFilter))
.slice(0, limit);
ok(res, {
ok: true,
generated_at: new Date().toISOString(),
items
});
} catch (error) {
next(error);
}
});
router.get("/api/autoruns/autogen/personality-catalog", (_req, res, next) => {
try {
ok(res, {
ok: true,
generated_at: new Date().toISOString(),
items: buildAutogenPersonalityCatalog()
});
} catch (error) {
next(error);
}
});
router.post("/api/autoruns/autogen/generate", async (req, res, next) => {
try {
const body = toRecord(req.body);
if (!body) {
throw new ApiError("INVALID_AUTOGEN_PAYLOAD", "JSON body is required", 400);
}
const mode = parseAutoGenMode(body.mode);
const count = parseAutogenCount(body.count);
const domain = parseAutogenDomain(body.domain);
const persistCaseSet = toBooleanSafe(body.persist_to_eval_cases) ?? true;
const generatedBy = parseAnnotationAuthor(body.generated_by);
const context = toRecord(body.context);
const llmConfig = parseAutogenLlmRuntimeConfig(body, context);
const personalityPrompt = toStringSafe(context?.autogen_personality_prompt);
let questions: string[] = [];
if (mode === "qwen_seed") {
if (!llmConfig) {
throw new ApiError(
"AUTOGEN_LLM_CONFIG_REQUIRED",
"Для режима qwen_seed нужен активный LLM-контур (provider/model/baseUrl) из настроек подключения.",
400
);
}
questions = await generateQwenSeedQuestionsLive({
count,
domain,
personalityPrompt,
llmConfig,
client: openaiClient
});
} else {
questions = generateCodexCreativeQuestions(count, domain);
}
questions = Array.from(new Set(questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))).slice(
0,
count
);
const generationId = generateAutogenId();
let savedCaseSetFile: string | null = null;
if (persistCaseSet) {
if (!fs.existsSync(EVAL_CASES_DIR)) {
fs.mkdirSync(EVAL_CASES_DIR, { recursive: true });
}
const fileName = buildAutogenCaseSetFileName(mode, generationId);
const filePath = path.resolve(EVAL_CASES_DIR, fileName);
const payload = buildAutogenCaseSetPayload({
generationId,
mode,
domain,
questions
});
fs.writeFileSync(filePath, JSON.stringify(payload, null, 2), "utf-8");
savedCaseSetFile = fileName;
}
const record: AutoGenHistoryRecord = {
generation_id: generationId,
created_at: new Date().toISOString(),
mode,
count: questions.length,
domain,
questions,
generated_by: generatedBy,
saved_case_set_file: savedCaseSetFile,
context: context
? {
llm_provider: toStringSafe(context.llm_provider),
model: toStringSafe(context.model),
assistant_prompt_version: toStringSafe(context.assistant_prompt_version),
decomposition_prompt_version: toStringSafe(context.decomposition_prompt_version),
prompt_fingerprint: toStringSafe(context.prompt_fingerprint)
? repairAutogenMojibake(String(context.prompt_fingerprint))
: null,
autogen_personality_id: toStringSafe(context.autogen_personality_id),
autogen_personality_prompt: toStringSafe(context.autogen_personality_prompt)
? repairAutogenMojibake(String(context.autogen_personality_prompt))
: null
}
: null
};
const history = readAutoGenHistory();
history.unshift(record);
writeAutoGenHistory(history.slice(0, 500));
ok(res, {
ok: true,
generation: record
});
} catch (error) {
next(error);
}
});
return router;
}