2303 lines
83 KiB
TypeScript
2303 lines
83 KiB
TypeScript
import fs from "fs";
|
||
import path from "path";
|
||
import { Router } from "express";
|
||
import iconv from "iconv-lite";
|
||
import {
|
||
ASSISTANT_SESSIONS_DIR,
|
||
AUTORUN_ANNOTATIONS_FILE,
|
||
AUTORUN_GENERATOR_HISTORY_FILE,
|
||
EVAL_CASES_DIR,
|
||
EVAL_DATASETS_DIR,
|
||
MANUAL_CASE_DECISION_SCHEMA_FILE,
|
||
REPORTS_DIR
|
||
} from "../config";
|
||
import { ApiError, ok } from "../utils/http";
|
||
import { loadCapabilitiesRegistry, resolveNearestCapabilityGroup, type CapabilityGroup } from "../services/capabilitiesRegistry";
|
||
import { OpenAIResponsesClient } from "../services/openaiResponsesClient";
|
||
|
||
type AutoRunTarget = "normalizer" | "assistant_stage1" | "assistant_stage2" | "assistant_p0" | "unknown";
|
||
type AutoRunTrend = "up" | "down" | "flat";
|
||
type AutoGenMode = "qwen_seed" | "codex_creative";
|
||
type ManualCaseDecision =
|
||
| "covered_ok"
|
||
| "covered_but_bad_answer"
|
||
| "candidate_for_implementation"
|
||
| "needs_routing_extension"
|
||
| "out_of_scope_but_answer_softly"
|
||
| "unsafe_question_limit_strictly"
|
||
| "needs_dialog_policy_fix"
|
||
| "needs_capability_registry_update"
|
||
| "bad_test_case";
|
||
|
||
const MANUAL_CASE_DECISIONS: ManualCaseDecision[] = [
|
||
"covered_ok",
|
||
"covered_but_bad_answer",
|
||
"candidate_for_implementation",
|
||
"needs_routing_extension",
|
||
"out_of_scope_but_answer_softly",
|
||
"unsafe_question_limit_strictly",
|
||
"needs_dialog_policy_fix",
|
||
"needs_capability_registry_update",
|
||
"bad_test_case"
|
||
];
|
||
|
||
const DECISION_QUEUE_MAP: Record<ManualCaseDecision, string> = {
|
||
covered_ok: "none",
|
||
covered_but_bad_answer: "policy_fix",
|
||
candidate_for_implementation: "routing_extension",
|
||
needs_routing_extension: "routing_extension",
|
||
out_of_scope_but_answer_softly: "soft_boundary",
|
||
unsafe_question_limit_strictly: "safety_policy",
|
||
needs_dialog_policy_fix: "policy_fix",
|
||
needs_capability_registry_update: "capability_registry",
|
||
bad_test_case: "testset_hygiene"
|
||
};
|
||
|
||
interface IndexedRun {
|
||
run_id: string;
|
||
eval_target: AutoRunTarget;
|
||
report_path: string;
|
||
report: Record<string, unknown>;
|
||
timestamp_iso: string;
|
||
timestamp_ms: number;
|
||
}
|
||
|
||
interface RunFilters {
|
||
from_ms: number | null;
|
||
to_ms: number | null;
|
||
target: AutoRunTarget | "all";
|
||
use_mock: boolean | null;
|
||
prompt_contains: string;
|
||
mode: string;
|
||
limit: number;
|
||
scan_limit: number;
|
||
}
|
||
|
||
interface DomainCoverage {
|
||
domain: string;
|
||
total_cases: number;
|
||
closed_cases: number;
|
||
}
|
||
|
||
interface RunCoverage {
|
||
closed_cases: number;
|
||
open_cases: number;
|
||
domain_coverage: DomainCoverage[];
|
||
}
|
||
|
||
interface RunSummary {
|
||
run_id: string;
|
||
eval_target: AutoRunTarget;
|
||
run_timestamp: string;
|
||
mode: string | null;
|
||
llm_provider: string | null;
|
||
model: string | null;
|
||
use_mock: boolean | null;
|
||
prompt_version: string | null;
|
||
schema_version: string | null;
|
||
suite_id: string | null;
|
||
cases_total: number;
|
||
requests_total: number | null;
|
||
report_path: string;
|
||
score_index: number | null;
|
||
blocking_failures: number;
|
||
quality_failures: number;
|
||
closed_cases: number;
|
||
open_cases: number;
|
||
domain_coverage: DomainCoverage[];
|
||
}
|
||
|
||
interface CaseSummary {
|
||
case_id: string;
|
||
domain: string | null;
|
||
query_class: string | null;
|
||
status: "closed" | "open" | "unknown";
|
||
score_index: number | null;
|
||
trace_id: string | null;
|
||
reply_type: string | null;
|
||
session_id: string;
|
||
dialog_available: boolean;
|
||
commented_count: number;
|
||
latest_annotation_at: string | null;
|
||
avg_rating: number | null;
|
||
checks: Record<string, unknown> | null;
|
||
metric_subscores: Record<string, unknown> | null;
|
||
}
|
||
|
||
interface HistoryStats {
|
||
runs_total: number;
|
||
by_target: Record<string, number>;
|
||
blocking_runs: number;
|
||
quality_gap_runs: number;
|
||
avg_score_index: number | null;
|
||
latest_score_index: number | null;
|
||
previous_score_index: number | null;
|
||
trend: AutoRunTrend;
|
||
domain_coverage: DomainCoverage[];
|
||
}
|
||
|
||
interface AutoRunAnnotationRecord {
|
||
annotation_id: string;
|
||
run_id: string;
|
||
case_id: string;
|
||
session_id: string;
|
||
message_index: number;
|
||
rating: number;
|
||
comment: string;
|
||
manual_case_decision: ManualCaseDecision;
|
||
annotation_author: string | null;
|
||
resolved: boolean;
|
||
resolved_at: string | null;
|
||
resolved_by: string | null;
|
||
created_at: string;
|
||
updated_at: string;
|
||
context: {
|
||
message_id: string | null;
|
||
trace_id: string | null;
|
||
reply_type: string | null;
|
||
eval_target: AutoRunTarget | "unknown";
|
||
prompt_version: string | null;
|
||
domain: string | null;
|
||
query_class: string | null;
|
||
question_text: string | null;
|
||
answer_text: string | null;
|
||
};
|
||
}
|
||
|
||
interface AnnotationStatsByCase {
|
||
count: number;
|
||
latest_at: string | null;
|
||
avg_rating: number | null;
|
||
}
|
||
|
||
interface AutoGenHistoryRecord {
|
||
generation_id: string;
|
||
created_at: string;
|
||
mode: AutoGenMode;
|
||
count: number;
|
||
domain: string | null;
|
||
questions: string[];
|
||
generated_by: string | null;
|
||
saved_case_set_file: string | null;
|
||
context: {
|
||
llm_provider: string | null;
|
||
model: string | null;
|
||
assistant_prompt_version: string | null;
|
||
decomposition_prompt_version: string | null;
|
||
prompt_fingerprint: string | null;
|
||
autogen_personality_id: string | null;
|
||
autogen_personality_prompt: string | null;
|
||
} | null;
|
||
}
|
||
|
||
interface AutoGenPersonalityCatalogItem {
|
||
id: string;
|
||
label: string;
|
||
domain: string | null;
|
||
default_prompt: string;
|
||
source: "built_in" | "capabilities_registry";
|
||
}
|
||
|
||
interface AutoGenLlmRuntimeConfig {
|
||
llm_provider: "openai" | "local";
|
||
api_key: string;
|
||
model: string;
|
||
base_url: string | null;
|
||
temperature: number | null;
|
||
max_output_tokens: number | null;
|
||
}
|
||
|
||
function toRecord(value: unknown): Record<string, unknown> | null {
|
||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||
return null;
|
||
}
|
||
return value as Record<string, unknown>;
|
||
}
|
||
|
||
function toArray(value: unknown): unknown[] {
|
||
return Array.isArray(value) ? value : [];
|
||
}
|
||
|
||
function toStringSafe(value: unknown): string | null {
|
||
if (typeof value !== "string") {
|
||
return null;
|
||
}
|
||
const trimmed = value.trim();
|
||
return trimmed.length > 0 ? trimmed : null;
|
||
}
|
||
|
||
function toNumberSafe(value: unknown): number | null {
|
||
if (typeof value === "number" && Number.isFinite(value)) {
|
||
return value;
|
||
}
|
||
if (typeof value === "string" && value.trim().length > 0) {
|
||
const parsed = Number(value);
|
||
return Number.isFinite(parsed) ? parsed : null;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function toBooleanSafe(value: unknown): boolean | null {
|
||
if (typeof value === "boolean") {
|
||
return value;
|
||
}
|
||
if (typeof value === "string") {
|
||
const lowered = value.trim().toLowerCase();
|
||
if (["1", "true", "yes", "on"].includes(lowered)) return true;
|
||
if (["0", "false", "no", "off"].includes(lowered)) return false;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function parseDateMs(value: unknown): number | null {
|
||
const asString = toStringSafe(value);
|
||
if (!asString) {
|
||
return null;
|
||
}
|
||
const ms = Date.parse(asString);
|
||
return Number.isFinite(ms) ? ms : null;
|
||
}
|
||
|
||
function clampInt(value: number | null, min: number, max: number, fallback: number): number {
|
||
if (value === null || !Number.isFinite(value)) {
|
||
return fallback;
|
||
}
|
||
const rounded = Math.trunc(value);
|
||
if (rounded < min) return min;
|
||
if (rounded > max) return max;
|
||
return rounded;
|
||
}
|
||
|
||
function parseManualCaseDecision(value: unknown, fallback: ManualCaseDecision = "needs_dialog_policy_fix"): ManualCaseDecision {
|
||
const normalized = toStringSafe(value);
|
||
if (!normalized) return fallback;
|
||
return (MANUAL_CASE_DECISIONS.includes(normalized as ManualCaseDecision) ? normalized : fallback) as ManualCaseDecision;
|
||
}
|
||
|
||
function parseAnnotationAuthor(value: unknown): string | null {
|
||
const author = toStringSafe(value);
|
||
if (!author) return null;
|
||
return author.slice(0, 80);
|
||
}
|
||
|
||
function parseAnnotationResolved(value: unknown, fallback = false): boolean {
|
||
const parsed = toBooleanSafe(value);
|
||
return parsed === null ? fallback : parsed;
|
||
}
|
||
|
||
function readManualDecisionSchema(): Record<string, unknown> {
|
||
const fallback: Record<string, unknown> = {
|
||
schema_version: "manual_case_decision_schema_v1_fallback",
|
||
enum: MANUAL_CASE_DECISIONS,
|
||
labels: {
|
||
covered_ok: "Покрыто и ок",
|
||
covered_but_bad_answer: "Покрыто, но ответ плохой",
|
||
candidate_for_implementation: "Кандидат на внедрение",
|
||
needs_routing_extension: "Нужно расширение маршрутизации",
|
||
out_of_scope_but_answer_softly: "Вне скоупа, но нужен мягкий ответ",
|
||
unsafe_question_limit_strictly: "Высокий риск, строгие ограничения",
|
||
needs_dialog_policy_fix: "Нужен фикс диалоговой политики",
|
||
needs_capability_registry_update: "Нужно обновить реестр возможностей",
|
||
bad_test_case: "Плохой тест-кейс"
|
||
},
|
||
queue_mapping: DECISION_QUEUE_MAP
|
||
};
|
||
if (!fs.existsSync(MANUAL_CASE_DECISION_SCHEMA_FILE)) {
|
||
return fallback;
|
||
}
|
||
try {
|
||
const parsed = JSON.parse(fs.readFileSync(MANUAL_CASE_DECISION_SCHEMA_FILE, "utf-8")) as unknown;
|
||
const record = toRecord(parsed);
|
||
return record ?? fallback;
|
||
} catch {
|
||
return fallback;
|
||
}
|
||
}
|
||
|
||
function readAutoGenHistory(): AutoGenHistoryRecord[] {
|
||
if (!fs.existsSync(AUTORUN_GENERATOR_HISTORY_FILE)) return [];
|
||
try {
|
||
const parsed = JSON.parse(fs.readFileSync(AUTORUN_GENERATOR_HISTORY_FILE, "utf-8")) as unknown;
|
||
if (!Array.isArray(parsed)) return [];
|
||
return parsed
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null)
|
||
.map((item) => ({
|
||
generation_id: toStringSafe(item.generation_id) ?? "",
|
||
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
|
||
mode: (toStringSafe(item.mode) as AutoGenMode | null) ?? "codex_creative",
|
||
count: clampInt(toNumberSafe(item.count), 1, 300, 20),
|
||
domain: toStringSafe(item.domain),
|
||
questions: toArray(item.questions)
|
||
.map((q) => toStringSafe(q))
|
||
.filter((q): q is string => q !== null)
|
||
.map((q) => sanitizeGeneratedQuestion(q))
|
||
.filter((q) => q.length > 0)
|
||
.slice(0, 500),
|
||
generated_by: toStringSafe(item.generated_by),
|
||
saved_case_set_file: toStringSafe(item.saved_case_set_file),
|
||
context: toRecord(item.context)
|
||
? {
|
||
llm_provider: toStringSafe(toRecord(item.context)?.llm_provider),
|
||
model: toStringSafe(toRecord(item.context)?.model),
|
||
assistant_prompt_version: toStringSafe(toRecord(item.context)?.assistant_prompt_version),
|
||
decomposition_prompt_version: toStringSafe(toRecord(item.context)?.decomposition_prompt_version),
|
||
prompt_fingerprint: toStringSafe(toRecord(item.context)?.prompt_fingerprint)
|
||
? repairAutogenMojibake(String(toRecord(item.context)?.prompt_fingerprint))
|
||
: null,
|
||
autogen_personality_id: toStringSafe(toRecord(item.context)?.autogen_personality_id),
|
||
autogen_personality_prompt: toStringSafe(toRecord(item.context)?.autogen_personality_prompt)
|
||
? repairAutogenMojibake(String(toRecord(item.context)?.autogen_personality_prompt))
|
||
: null
|
||
}
|
||
: null
|
||
}))
|
||
.filter((item) => item.generation_id.length > 0)
|
||
.sort((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at));
|
||
} catch {
|
||
return [];
|
||
}
|
||
}
|
||
|
||
function writeAutoGenHistory(records: AutoGenHistoryRecord[]): void {
|
||
const dir = path.dirname(AUTORUN_GENERATOR_HISTORY_FILE);
|
||
if (!fs.existsSync(dir)) {
|
||
fs.mkdirSync(dir, { recursive: true });
|
||
}
|
||
fs.writeFileSync(AUTORUN_GENERATOR_HISTORY_FILE, JSON.stringify(records, null, 2), "utf-8");
|
||
}
|
||
|
||
function readEvalDatasetCases(filePath: string): Array<Record<string, unknown>> {
|
||
try {
|
||
const parsed = JSON.parse(fs.readFileSync(filePath, "utf-8")) as unknown;
|
||
if (Array.isArray(parsed)) {
|
||
return parsed.map((item) => toRecord(item)).filter((item): item is Record<string, unknown> => item !== null);
|
||
}
|
||
const record = toRecord(parsed);
|
||
if (!record) return [];
|
||
const cases = toArray(record.cases).map((item) => toRecord(item)).filter((item): item is Record<string, unknown> => item !== null);
|
||
return cases;
|
||
} catch {
|
||
return [];
|
||
}
|
||
}
|
||
|
||
function collectCanonicalQuestions(limit = 300): string[] {
|
||
if (!fs.existsSync(EVAL_DATASETS_DIR)) {
|
||
return [];
|
||
}
|
||
const entries = fs.readdirSync(EVAL_DATASETS_DIR, { withFileTypes: true });
|
||
const questions: string[] = [];
|
||
for (const entry of entries) {
|
||
if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
|
||
const fullPath = path.resolve(EVAL_DATASETS_DIR, entry.name);
|
||
const cases = readEvalDatasetCases(fullPath);
|
||
for (const testCase of cases) {
|
||
const rawQuestion = toStringSafe(testCase.raw_question) ?? toStringSafe(testCase.user_message) ?? toStringSafe(testCase.query);
|
||
if (rawQuestion) {
|
||
questions.push(sanitizeGeneratedQuestion(rawQuestion));
|
||
}
|
||
}
|
||
}
|
||
return Array.from(new Set(questions.filter((item) => item.length > 0))).slice(0, limit);
|
||
}
|
||
|
||
function normalizeDomainHint(value: unknown): string | null {
|
||
const domain = toStringSafe(value);
|
||
if (!domain) return null;
|
||
return domain.toLowerCase();
|
||
}
|
||
|
||
function buildAutogenPromptFromCapabilityGroup(group: CapabilityGroup): string {
|
||
const supported = group.supported_operations.slice(0, 3).join(", ");
|
||
const examples = group.typical_queries.slice(0, 2).join(" | ");
|
||
const hints = group.one_c_hints.slice(0, 2).join(", ");
|
||
const operationsPart = supported ? ` Опирайся на операции: ${supported}.` : "";
|
||
const examplesPart = examples ? ` Ближайшие формулировки: ${examples}.` : "";
|
||
const hintsPart = hints ? ` Можно мягко упоминать контекст 1С: ${hints}.` : "";
|
||
return (
|
||
`Генерируй реалистичные вопросы бухгалтера по группе "${group.group_title}".` +
|
||
` Добавляй живую разговорную форму и опечатки, но сохраняй бизнес-смысл.${operationsPart}${examplesPart}${hintsPart}` +
|
||
" Не выдумывай операции вне read-only режима."
|
||
);
|
||
}
|
||
|
||
function buildAutogenPersonalityCatalog(): AutoGenPersonalityCatalogItem[] {
|
||
const builtIn: AutoGenPersonalityCatalogItem[] = [
|
||
{
|
||
id: "general",
|
||
label: "Общий контур",
|
||
domain: null,
|
||
default_prompt:
|
||
"Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл.",
|
||
source: "built_in"
|
||
}
|
||
];
|
||
|
||
const registry = loadCapabilitiesRegistry();
|
||
const registryBased = registry.groups.map<AutoGenPersonalityCatalogItem>((group) => ({
|
||
id: `registry_${group.group_code}`,
|
||
label: `${group.group_title} (реестр)`,
|
||
domain: group.group_code,
|
||
default_prompt: buildAutogenPromptFromCapabilityGroup(group),
|
||
source: "capabilities_registry"
|
||
}));
|
||
|
||
const dedup = new Map<string, AutoGenPersonalityCatalogItem>();
|
||
for (const item of [...builtIn, ...registryBased]) {
|
||
if (!item.id.trim()) continue;
|
||
if (!dedup.has(item.id)) {
|
||
dedup.set(item.id, item);
|
||
}
|
||
}
|
||
return [...dedup.values()].map((item) => ({
|
||
...item,
|
||
label: repairAutogenMojibake(item.label),
|
||
default_prompt: repairAutogenMojibake(item.default_prompt)
|
||
}));
|
||
}
|
||
|
||
function fallbackDomainTemplates(domain: string | null): string[] {
|
||
if (domain?.includes("vat") || domain?.includes("ндс")) {
|
||
return [
|
||
"Сколько НДС к уплате на дату по организации?",
|
||
"Покажи прогноз НДС за период по организации.",
|
||
"Почему по НДС сейчас ноль и из чего сложился расчет?"
|
||
];
|
||
}
|
||
if (domain?.includes("counter") || domain?.includes("контраг")) {
|
||
return [
|
||
"Покажи топ контрагентов по сумме платежей за период.",
|
||
"Какой самый крупный договор у выбранной организации?",
|
||
"Какие документы были по контрагенту за весь период?"
|
||
];
|
||
}
|
||
if (domain?.includes("settlement") || domain?.includes("задолж") || domain?.includes("расчет")) {
|
||
return [
|
||
"Какие незакрытые расчеты висят на конец периода?",
|
||
"Есть ли незакрытые авансы по поставщикам?",
|
||
"Покажи цепочки закрытия по счетам 60/62."
|
||
];
|
||
}
|
||
return [
|
||
"С какой организацией сейчас можно работать в активном контуре?",
|
||
"Покажи ключевые операции за выбранный период.",
|
||
"Какие вопросы по этому домену ассистент поддерживает прямо сейчас?"
|
||
];
|
||
}
|
||
|
||
function mutateIntoQwenStyle(base: string, index: number): string {
|
||
const wrappers = ["йо ", "слушай ", "подскажи плиз ", "короче ", "мож ", "а ну-ка "];
|
||
const tails = ["", " без воды", " по факту", " и коротко", " прям сейчас", " за весь период"];
|
||
const typoMap: Array<[RegExp, string]> = [
|
||
[/\bкомпания\b/gi, "компиния"],
|
||
[/\bсейчас\b/gi, "щас"],
|
||
[/\bпожалуйста\b/gi, "плиз"],
|
||
[/\bкакая\b/gi, "кака"],
|
||
[/\bчто\b/gi, "че"]
|
||
];
|
||
const prefix = wrappers[index % wrappers.length];
|
||
const tail = tails[index % tails.length];
|
||
let text = `${prefix}${base}${tail}`.trim();
|
||
if (index % 2 === 0) {
|
||
const [pattern, replacement] = typoMap[index % typoMap.length];
|
||
text = text.replace(pattern, replacement);
|
||
}
|
||
return text;
|
||
}
|
||
|
||
function generateQwenSeedQuestions(count: number, domain: string | null): string[] {
|
||
const seed = collectCanonicalQuestions(450);
|
||
const source = seed.length > 0 ? seed : fallbackDomainTemplates(domain);
|
||
const filtered = domain
|
||
? source.filter((item) => item.toLowerCase().includes(domain) || fallbackDomainTemplates(domain).includes(item))
|
||
: source;
|
||
const bag = filtered.length > 0 ? filtered : source;
|
||
const out: string[] = [];
|
||
for (let index = 0; index < count; index += 1) {
|
||
const base = bag[index % bag.length];
|
||
out.push(sanitizeGeneratedQuestion(mutateIntoQwenStyle(base, index)));
|
||
}
|
||
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
|
||
}
|
||
|
||
function generateCodexCreativeQuestions(count: number, domain: string | null): string[] {
|
||
const domainTemplates = fallbackDomainTemplates(domain);
|
||
const patterns = [
|
||
"Дай бизнес-срез по состоянию на дату: {q}",
|
||
"Нужен аккуратный ответ как бухгалтеру: {q}",
|
||
"Если данных не хватает, скажи что уточнить, но сначала попробуй: {q}",
|
||
"Сформулируй результат без технички и с шагом дальше: {q}",
|
||
"Проверь в read-only и скажи что видно: {q}"
|
||
];
|
||
const out: string[] = [];
|
||
for (let index = 0; index < count; index += 1) {
|
||
const base = domainTemplates[index % domainTemplates.length];
|
||
const pattern = patterns[index % patterns.length];
|
||
out.push(sanitizeGeneratedQuestion(pattern.replace("{q}", base)));
|
||
}
|
||
return Array.from(new Set(out.filter((item) => item.length > 0))).slice(0, count);
|
||
}
|
||
|
||
function generateAutogenId(): string {
|
||
return `gen-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
|
||
}
|
||
|
||
function readAnnotations(): AutoRunAnnotationRecord[] {
|
||
if (!fs.existsSync(AUTORUN_ANNOTATIONS_FILE)) {
|
||
return [];
|
||
}
|
||
try {
|
||
const raw = fs.readFileSync(AUTORUN_ANNOTATIONS_FILE, "utf-8");
|
||
const parsed = JSON.parse(raw) as unknown;
|
||
if (!Array.isArray(parsed)) {
|
||
return [];
|
||
}
|
||
return parsed
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null)
|
||
.map((item) => {
|
||
const context = toRecord(item.context);
|
||
return {
|
||
annotation_id: toStringSafe(item.annotation_id) ?? "",
|
||
run_id: toStringSafe(item.run_id) ?? "",
|
||
case_id: toStringSafe(item.case_id) ?? "",
|
||
session_id: toStringSafe(item.session_id) ?? "",
|
||
message_index: clampInt(toNumberSafe(item.message_index), 0, 100_000, 0),
|
||
rating: clampInt(toNumberSafe(item.rating), 1, 5, 1),
|
||
comment: toStringSafe(item.comment) ?? "",
|
||
manual_case_decision: parseManualCaseDecision(item.manual_case_decision),
|
||
annotation_author: parseAnnotationAuthor(item.annotation_author),
|
||
resolved: parseAnnotationResolved(item.resolved),
|
||
resolved_at: toStringSafe(item.resolved_at),
|
||
resolved_by: parseAnnotationAuthor(item.resolved_by),
|
||
created_at: toStringSafe(item.created_at) ?? new Date().toISOString(),
|
||
updated_at: toStringSafe(item.updated_at) ?? new Date().toISOString(),
|
||
context: {
|
||
message_id: toStringSafe(context?.message_id),
|
||
trace_id: toStringSafe(context?.trace_id),
|
||
reply_type: toStringSafe(context?.reply_type),
|
||
eval_target: (toStringSafe(context?.eval_target) as AutoRunTarget | null) ?? "unknown",
|
||
prompt_version: toStringSafe(context?.prompt_version),
|
||
domain: toStringSafe(context?.domain),
|
||
query_class: toStringSafe(context?.query_class),
|
||
question_text: toStringSafe(context?.question_text),
|
||
answer_text: toStringSafe(context?.answer_text)
|
||
}
|
||
} satisfies AutoRunAnnotationRecord;
|
||
})
|
||
.filter((item) => item.annotation_id && item.run_id && item.case_id);
|
||
} catch {
|
||
return [];
|
||
}
|
||
}
|
||
|
||
function writeAnnotations(items: AutoRunAnnotationRecord[]): void {
|
||
fs.writeFileSync(AUTORUN_ANNOTATIONS_FILE, JSON.stringify(items, null, 2), "utf-8");
|
||
}
|
||
|
||
function annotationKey(runId: string, caseId: string, messageIndex: number): string {
|
||
return `${runId}::${caseId}::${messageIndex}`;
|
||
}
|
||
|
||
function buildAnnotationStatsMap(runId: string, annotations: AutoRunAnnotationRecord[]): Map<string, AnnotationStatsByCase> {
|
||
const scoped = annotations.filter((item) => item.run_id === runId);
|
||
const buckets = new Map<string, { count: number; ratings: number[]; latestMs: number | null }>();
|
||
for (const item of scoped) {
|
||
const bucket = buckets.get(item.case_id) ?? { count: 0, ratings: [], latestMs: null };
|
||
bucket.count += 1;
|
||
bucket.ratings.push(item.rating);
|
||
const ms = Date.parse(item.updated_at);
|
||
if (Number.isFinite(ms) && (bucket.latestMs === null || ms > bucket.latestMs)) {
|
||
bucket.latestMs = ms;
|
||
}
|
||
buckets.set(item.case_id, bucket);
|
||
}
|
||
|
||
const result = new Map<string, AnnotationStatsByCase>();
|
||
for (const [caseId, bucket] of buckets.entries()) {
|
||
const avg = bucket.ratings.length > 0 ? Number((bucket.ratings.reduce((a, b) => a + b, 0) / bucket.ratings.length).toFixed(2)) : null;
|
||
result.set(caseId, {
|
||
count: bucket.count,
|
||
latest_at: bucket.latestMs === null ? null : new Date(bucket.latestMs).toISOString(),
|
||
avg_rating: avg
|
||
});
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function buildAnnotationsByMessageIndex(runId: string, caseId: string, annotations: AutoRunAnnotationRecord[]): Map<number, AutoRunAnnotationRecord> {
|
||
const map = new Map<number, AutoRunAnnotationRecord>();
|
||
for (const item of annotations) {
|
||
if (item.run_id !== runId || item.case_id !== caseId) continue;
|
||
const current = map.get(item.message_index);
|
||
const currentMs = current ? Date.parse(current.updated_at) : null;
|
||
const nextMs = Date.parse(item.updated_at);
|
||
if (!current || (!Number.isNaN(nextMs) && (currentMs === null || nextMs >= currentMs))) {
|
||
map.set(item.message_index, item);
|
||
}
|
||
}
|
||
return map;
|
||
}
|
||
|
||
function resolveRunTarget(input: { report: Record<string, unknown>; runId: string; reportPath: string }): AutoRunTarget {
|
||
const explicit = toStringSafe(input.report.eval_target);
|
||
if (explicit === "assistant_stage1" || explicit === "assistant_stage2" || explicit === "assistant_p0" || explicit === "normalizer") {
|
||
return explicit;
|
||
}
|
||
|
||
if (input.runId.startsWith("assistant-stage1-")) return "assistant_stage1";
|
||
if (input.runId.startsWith("assistant-stage2-")) return "assistant_stage2";
|
||
if (input.runId.startsWith("assistant-p0-")) return "assistant_p0";
|
||
if (input.runId.startsWith("eval-")) return "normalizer";
|
||
if (input.reportPath.endsWith(".report.json")) return "normalizer";
|
||
return "unknown";
|
||
}
|
||
|
||
function normalizeTimestamp(report: Record<string, unknown>, fileMtimeMs: number): { iso: string; ms: number } {
|
||
const first = parseDateMs(report.run_timestamp);
|
||
if (first !== null) {
|
||
return { iso: new Date(first).toISOString(), ms: first };
|
||
}
|
||
const second = parseDateMs(report.timestamp);
|
||
if (second !== null) {
|
||
return { iso: new Date(second).toISOString(), ms: second };
|
||
}
|
||
return { iso: new Date(fileMtimeMs).toISOString(), ms: fileMtimeMs };
|
||
}
|
||
|
||
function rateToPercent(value: number | null): number | null {
|
||
if (value === null) return null;
|
||
if (value <= 1.2) return Math.max(0, Math.min(100, value * 100));
|
||
return Math.max(0, Math.min(100, value));
|
||
}
|
||
|
||
function scoreToPercent(value: number | null): number | null {
|
||
if (value === null) return null;
|
||
if (value <= 5.2) return Math.max(0, Math.min(100, (value / 5) * 100));
|
||
return Math.max(0, Math.min(100, value));
|
||
}
|
||
|
||
function average(values: Array<number | null>): number | null {
|
||
const filtered = values.filter((item): item is number => typeof item === "number" && Number.isFinite(item));
|
||
if (filtered.length === 0) {
|
||
return null;
|
||
}
|
||
const sum = filtered.reduce((acc, item) => acc + item, 0);
|
||
return Number((sum / filtered.length).toFixed(2));
|
||
}
|
||
|
||
function getMetricRecord(report: Record<string, unknown>): Record<string, unknown> | null {
|
||
const metrics = toRecord(report.metrics);
|
||
if (!metrics) return null;
|
||
const raw = toRecord(metrics.raw);
|
||
return raw ?? metrics;
|
||
}
|
||
|
||
function computeScoreIndex(report: Record<string, unknown>, target: AutoRunTarget): number | null {
|
||
const metrics = getMetricRecord(report);
|
||
if (!metrics) {
|
||
return null;
|
||
}
|
||
|
||
if (target === "assistant_p0") {
|
||
return average([
|
||
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
|
||
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
|
||
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1)),
|
||
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
|
||
rateToPercent(toNumberSafe(metrics.route_correctness_rate)),
|
||
rateToPercent(toNumberSafe(metrics.domain_purity_rate)),
|
||
rateToPercent(toNumberSafe(metrics.limitation_honesty_rate)),
|
||
rateToPercent(toNumberSafe(metrics.top_problem_unit_match_rate))
|
||
]);
|
||
}
|
||
|
||
if (target === "assistant_stage1") {
|
||
return average([
|
||
rateToPercent(toNumberSafe(metrics.retrieval_differentiation_rate)),
|
||
rateToPercent(1 - (toNumberSafe(metrics.generic_explanation_rate) ?? 1)),
|
||
scoreToPercent(toNumberSafe(metrics.accountant_actionability_score)),
|
||
rateToPercent(1 - (toNumberSafe(metrics.false_confidence_rate) ?? 1)),
|
||
rateToPercent(1 - (toNumberSafe(metrics.broad_answer_rate) ?? 1)),
|
||
scoreToPercent(toNumberSafe(metrics.mechanism_specificity_score)),
|
||
scoreToPercent(toNumberSafe(metrics.followup_context_retention_score))
|
||
]);
|
||
}
|
||
|
||
if (target === "assistant_stage2") {
|
||
return average([
|
||
rateToPercent(toNumberSafe(metrics.problem_unit_precision)),
|
||
rateToPercent(toNumberSafe(metrics.problem_unit_recall_proxy)),
|
||
rateToPercent(toNumberSafe(metrics.duplicate_collapse_rate)),
|
||
scoreToPercent(toNumberSafe(metrics.mechanism_coherence_score)),
|
||
scoreToPercent(toNumberSafe(metrics.problem_clarity_score)),
|
||
rateToPercent(toNumberSafe(metrics.problem_first_answer_rate)),
|
||
rateToPercent(1 - (toNumberSafe(metrics.entity_leakage_rate) ?? 1))
|
||
]);
|
||
}
|
||
|
||
return average([
|
||
rateToPercent(toNumberSafe(metrics.schema_validation_pass_rate)),
|
||
rateToPercent(toNumberSafe(metrics.route_resolution_accuracy) ?? toNumberSafe(metrics.route_hint_accuracy)),
|
||
rateToPercent(toNumberSafe(metrics.execution_state_consistency_rate) ?? toNumberSafe(metrics.intent_class_accuracy)),
|
||
rateToPercent(100 - (toNumberSafe(metrics.high_confidence_error_rate) ?? 0))
|
||
]);
|
||
}
|
||
|
||
function countFailures(report: Record<string, unknown>): { blocking: number; quality: number } {
|
||
const acceptanceGate = toRecord(report.acceptance_gate);
|
||
const baselineGate = toRecord(report.baseline_stability_gate);
|
||
|
||
const blocking =
|
||
toArray(acceptanceGate?.blocking_failures).length + toArray(baselineGate?.blocking_regressions).length;
|
||
|
||
const quality =
|
||
toArray(acceptanceGate?.quality_failures).length +
|
||
toArray(baselineGate?.legacy_quality_failures).length +
|
||
toArray(baselineGate?.quality_gap_failures).length;
|
||
|
||
return { blocking, quality };
|
||
}
|
||
|
||
function caseScoreFromMetricSubscores(metricSubscores: Record<string, unknown> | null): number | null {
|
||
if (!metricSubscores) return null;
|
||
const directProduct = scoreToPercent(toNumberSafe(metricSubscores.case_product_score));
|
||
if (directProduct !== null) {
|
||
return Number(directProduct.toFixed(2));
|
||
}
|
||
|
||
const candidates: Array<number | null> = [
|
||
scoreToPercent(toNumberSafe(metricSubscores.problem_clarity_score)),
|
||
scoreToPercent(toNumberSafe(metricSubscores.mechanism_coherence_score)),
|
||
rateToPercent(toNumberSafe(metricSubscores.problem_first_answer_rate)),
|
||
rateToPercent(1 - (toNumberSafe(metricSubscores.entity_leakage_rate) ?? 1)),
|
||
scoreToPercent(toNumberSafe(metricSubscores.accountant_usefulness_score))
|
||
];
|
||
return average(candidates);
|
||
}
|
||
|
||
function isCaseClosed(input: {
|
||
checks: Record<string, unknown> | null;
|
||
scoreIndex: number | null;
|
||
}): boolean | null {
|
||
const checks = input.checks;
|
||
if (checks) {
|
||
const routeCorrect = toBooleanSafe(checks.route_correct);
|
||
const domainPure = toBooleanSafe(checks.domain_pure);
|
||
const problemFirst = toBooleanSafe(checks.problem_first_answer);
|
||
if (routeCorrect !== null || domainPure !== null || problemFirst !== null) {
|
||
if (routeCorrect === false) return false;
|
||
if (domainPure === false) return false;
|
||
if (problemFirst === false) return false;
|
||
return true;
|
||
}
|
||
}
|
||
if (typeof input.scoreIndex === "number") {
|
||
return input.scoreIndex >= 65;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function getResultCases(report: Record<string, unknown>): Array<Record<string, unknown>> {
|
||
return toArray(report.results)
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null);
|
||
}
|
||
|
||
function buildCaseSummaries(
|
||
report: Record<string, unknown>,
|
||
runId: string,
|
||
checkDialogAvailability: boolean,
|
||
annotationStatsByCase?: Map<string, AnnotationStatsByCase>
|
||
): CaseSummary[] {
|
||
const results = getResultCases(report);
|
||
return results.map((item, index) => {
|
||
const caseId = toStringSafe(item.case_id) ?? `case-${index + 1}`;
|
||
const checks = toRecord(item.checks);
|
||
const metricSubscores = toRecord(item.metric_subscores);
|
||
const scoreIndex =
|
||
caseScoreFromMetricSubscores(metricSubscores) ??
|
||
scoreToPercent(toNumberSafe(item.accountant_usefulness_score)) ??
|
||
null;
|
||
const closedState = isCaseClosed({ checks, scoreIndex });
|
||
const sessionId = `${runId}-${caseId}`;
|
||
const dialogAvailable = checkDialogAvailability
|
||
? fs.existsSync(path.resolve(ASSISTANT_SESSIONS_DIR, `${sessionId}.json`))
|
||
: false;
|
||
const annotationStats = annotationStatsByCase?.get(caseId);
|
||
|
||
return {
|
||
case_id: caseId,
|
||
domain: toStringSafe(item.domain),
|
||
query_class: toStringSafe(item.query_class),
|
||
status: closedState === null ? "unknown" : closedState ? "closed" : "open",
|
||
score_index: scoreIndex === null ? null : Number(scoreIndex.toFixed(2)),
|
||
trace_id: toStringSafe(item.trace_id),
|
||
reply_type: toStringSafe(item.reply_type),
|
||
session_id: sessionId,
|
||
dialog_available: dialogAvailable,
|
||
commented_count: annotationStats?.count ?? 0,
|
||
latest_annotation_at: annotationStats?.latest_at ?? null,
|
||
avg_rating: annotationStats?.avg_rating ?? null,
|
||
checks,
|
||
metric_subscores: metricSubscores
|
||
};
|
||
});
|
||
}
|
||
|
||
function buildCoverageFromCases(cases: CaseSummary[]): RunCoverage {
|
||
const coverageByDomain = new Map<string, { total: number; closed: number }>();
|
||
let closedCases = 0;
|
||
let openCases = 0;
|
||
|
||
for (const item of cases) {
|
||
if (item.status === "closed") closedCases += 1;
|
||
if (item.status === "open") openCases += 1;
|
||
|
||
const domainKey = item.domain ?? "unknown";
|
||
const current = coverageByDomain.get(domainKey) ?? { total: 0, closed: 0 };
|
||
current.total += 1;
|
||
if (item.status === "closed") current.closed += 1;
|
||
coverageByDomain.set(domainKey, current);
|
||
}
|
||
|
||
const domainCoverage = Array.from(coverageByDomain.entries())
|
||
.map(([domain, value]) => ({
|
||
domain,
|
||
total_cases: value.total,
|
||
closed_cases: value.closed
|
||
}))
|
||
.sort((a, b) => b.total_cases - a.total_cases);
|
||
|
||
return {
|
||
closed_cases: closedCases,
|
||
open_cases: openCases,
|
||
domain_coverage: domainCoverage
|
||
};
|
||
}
|
||
|
||
function collectJsonCandidates(scanLimit: number): Array<{ path: string; mtimeMs: number }> {
|
||
const candidates: Array<{ path: string; mtimeMs: number }> = [];
|
||
const sources: Array<{ dir: string; suffix: string }> = [
|
||
{ dir: REPORTS_DIR, suffix: ".json" },
|
||
{ dir: EVAL_CASES_DIR, suffix: ".report.json" }
|
||
];
|
||
|
||
for (const source of sources) {
|
||
if (!fs.existsSync(source.dir)) continue;
|
||
const entries = fs.readdirSync(source.dir, { withFileTypes: true });
|
||
for (const entry of entries) {
|
||
if (!entry.isFile()) continue;
|
||
if (!entry.name.endsWith(source.suffix)) continue;
|
||
const fullPath = path.resolve(source.dir, entry.name);
|
||
try {
|
||
const stat = fs.statSync(fullPath);
|
||
candidates.push({ path: fullPath, mtimeMs: stat.mtimeMs });
|
||
} catch {
|
||
// skip broken file stat
|
||
}
|
||
}
|
||
}
|
||
|
||
return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, scanLimit);
|
||
}
|
||
|
||
function indexRuns(scanLimit: number): IndexedRun[] {
|
||
const files = collectJsonCandidates(scanLimit);
|
||
const dedup = new Map<string, IndexedRun>();
|
||
|
||
for (const item of files) {
|
||
let parsed: unknown;
|
||
try {
|
||
const raw = fs.readFileSync(item.path, "utf-8");
|
||
parsed = JSON.parse(raw) as unknown;
|
||
} catch {
|
||
continue;
|
||
}
|
||
const report = toRecord(parsed);
|
||
if (!report) continue;
|
||
const runId = toStringSafe(report.run_id);
|
||
if (!runId) continue;
|
||
const evalTarget = resolveRunTarget({ report, runId, reportPath: item.path });
|
||
const normalizedTime = normalizeTimestamp(report, item.mtimeMs);
|
||
const indexed: IndexedRun = {
|
||
run_id: runId,
|
||
eval_target: evalTarget,
|
||
report_path: item.path,
|
||
report,
|
||
timestamp_iso: normalizedTime.iso,
|
||
timestamp_ms: normalizedTime.ms
|
||
};
|
||
|
||
const current = dedup.get(runId);
|
||
if (!current || indexed.timestamp_ms > current.timestamp_ms) {
|
||
dedup.set(runId, indexed);
|
||
}
|
||
}
|
||
|
||
return Array.from(dedup.values()).sort((a, b) => b.timestamp_ms - a.timestamp_ms);
|
||
}
|
||
|
||
function parseFilters(query: Record<string, unknown>): RunFilters {
|
||
const fromMs = parseDateMs(query.from);
|
||
const toMs = parseDateMs(query.to);
|
||
const targetRaw = toStringSafe(query.target)?.toLowerCase() ?? "all";
|
||
const target =
|
||
targetRaw === "normalizer" || targetRaw === "assistant_stage1" || targetRaw === "assistant_stage2" || targetRaw === "assistant_p0"
|
||
? targetRaw
|
||
: "all";
|
||
const useMock = toStringSafe(query.use_mock);
|
||
const useMockFilter = useMock === null || useMock.toLowerCase() === "any" ? null : toBooleanSafe(useMock);
|
||
const mode = toStringSafe(query.mode)?.toLowerCase() ?? "all";
|
||
const promptContains = (toStringSafe(query.prompt_contains) ?? "").toLowerCase();
|
||
const limit = clampInt(toNumberSafe(query.limit), 1, 500, 120);
|
||
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 900);
|
||
|
||
return {
|
||
from_ms: fromMs,
|
||
to_ms: toMs,
|
||
target,
|
||
use_mock: useMockFilter,
|
||
prompt_contains: promptContains,
|
||
mode,
|
||
limit,
|
||
scan_limit: scanLimit
|
||
};
|
||
}
|
||
|
||
function matchesFilters(run: IndexedRun, filters: RunFilters): boolean {
|
||
if (filters.from_ms !== null && run.timestamp_ms < filters.from_ms) return false;
|
||
if (filters.to_ms !== null && run.timestamp_ms > filters.to_ms) return false;
|
||
if (filters.target !== "all" && run.eval_target !== filters.target) return false;
|
||
|
||
const modeValue = (toStringSafe(run.report.mode) ?? "").toLowerCase();
|
||
if (filters.mode !== "all" && modeValue !== filters.mode) return false;
|
||
|
||
if (filters.use_mock !== null) {
|
||
const useMockValue = toBooleanSafe(run.report.use_mock);
|
||
if (useMockValue !== filters.use_mock) return false;
|
||
}
|
||
|
||
if (filters.prompt_contains.length > 0) {
|
||
const promptVersion = (toStringSafe(run.report.prompt_version) ?? "").toLowerCase();
|
||
if (!promptVersion.includes(filters.prompt_contains)) return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
function buildRunSummary(run: IndexedRun): RunSummary {
|
||
const connection = toRecord(run.report.connection);
|
||
const normalizeConfig = toRecord(run.report.normalize_config) ?? toRecord(run.report.normalizeConfig);
|
||
const llmProvider =
|
||
toStringSafe(run.report.llm_provider) ??
|
||
toStringSafe(run.report.llmProvider) ??
|
||
toStringSafe(connection?.llm_provider) ??
|
||
toStringSafe(connection?.llmProvider) ??
|
||
toStringSafe(normalizeConfig?.llm_provider) ??
|
||
toStringSafe(normalizeConfig?.llmProvider);
|
||
const model =
|
||
toStringSafe(run.report.model) ??
|
||
toStringSafe(connection?.model) ??
|
||
toStringSafe(normalizeConfig?.model);
|
||
const cases = buildCaseSummaries(run.report, run.run_id, false);
|
||
const coverage = buildCoverageFromCases(cases);
|
||
const failures = countFailures(run.report);
|
||
return {
|
||
run_id: run.run_id,
|
||
eval_target: run.eval_target,
|
||
run_timestamp: run.timestamp_iso,
|
||
mode: toStringSafe(run.report.mode),
|
||
llm_provider: llmProvider,
|
||
model,
|
||
use_mock: toBooleanSafe(run.report.use_mock),
|
||
prompt_version: toStringSafe(run.report.prompt_version),
|
||
schema_version: toStringSafe(run.report.schema_version),
|
||
suite_id: toStringSafe(run.report.suite_id),
|
||
cases_total: toNumberSafe(run.report.cases_total) ?? cases.length,
|
||
requests_total: toNumberSafe(toRecord(run.report.budget)?.requests_total),
|
||
report_path: run.report_path,
|
||
score_index: computeScoreIndex(run.report, run.eval_target),
|
||
blocking_failures: failures.blocking,
|
||
quality_failures: failures.quality,
|
||
closed_cases: coverage.closed_cases,
|
||
open_cases: coverage.open_cases,
|
||
domain_coverage: coverage.domain_coverage
|
||
};
|
||
}
|
||
|
||
function mergeDomainCoverage(summaries: RunSummary[]): DomainCoverage[] {
|
||
const merged = new Map<string, { total: number; closed: number }>();
|
||
for (const summary of summaries) {
|
||
for (const item of summary.domain_coverage) {
|
||
const current = merged.get(item.domain) ?? { total: 0, closed: 0 };
|
||
current.total += item.total_cases;
|
||
current.closed += item.closed_cases;
|
||
merged.set(item.domain, current);
|
||
}
|
||
}
|
||
return Array.from(merged.entries())
|
||
.map(([domain, value]) => ({
|
||
domain,
|
||
total_cases: value.total,
|
||
closed_cases: value.closed
|
||
}))
|
||
.sort((a, b) => b.total_cases - a.total_cases);
|
||
}
|
||
|
||
function buildHistoryStats(summaries: RunSummary[]): HistoryStats {
|
||
const byTarget: Record<string, number> = {};
|
||
let blockingRuns = 0;
|
||
let qualityRuns = 0;
|
||
const scoreValues: number[] = [];
|
||
|
||
for (const item of summaries) {
|
||
byTarget[item.eval_target] = (byTarget[item.eval_target] ?? 0) + 1;
|
||
if (item.blocking_failures > 0) blockingRuns += 1;
|
||
if (item.quality_failures > 0) qualityRuns += 1;
|
||
if (typeof item.score_index === "number") scoreValues.push(item.score_index);
|
||
}
|
||
|
||
const latestScore = typeof summaries[0]?.score_index === "number" ? (summaries[0].score_index as number) : null;
|
||
const previousScore = typeof summaries[1]?.score_index === "number" ? (summaries[1].score_index as number) : null;
|
||
const trend: AutoRunTrend =
|
||
latestScore === null || previousScore === null
|
||
? "flat"
|
||
: latestScore > previousScore + 0.5
|
||
? "up"
|
||
: latestScore < previousScore - 0.5
|
||
? "down"
|
||
: "flat";
|
||
|
||
return {
|
||
runs_total: summaries.length,
|
||
by_target: byTarget,
|
||
blocking_runs: blockingRuns,
|
||
quality_gap_runs: qualityRuns,
|
||
avg_score_index: scoreValues.length > 0 ? Number((scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length).toFixed(2)) : null,
|
||
latest_score_index: latestScore,
|
||
previous_score_index: previousScore,
|
||
trend,
|
||
domain_coverage: mergeDomainCoverage(summaries)
|
||
};
|
||
}
|
||
|
||
function findRunById(runId: string, scanLimit = 3000): IndexedRun | null {
|
||
const indexed = indexRuns(scanLimit);
|
||
return indexed.find((item) => item.run_id === runId) ?? null;
|
||
}
|
||
|
||
function buildAssistantModeSummary(dialogRecord: Record<string, unknown> | null): Record<string, unknown> | null {
|
||
if (!dialogRecord) return null;
|
||
const conversation = toArray(dialogRecord.conversation)
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null);
|
||
const lastAssistant = [...conversation]
|
||
.reverse()
|
||
.find((item) => toStringSafe(item.role) === "assistant");
|
||
const debug = toRecord(lastAssistant?.debug);
|
||
return {
|
||
reply_type: toStringSafe(lastAssistant?.reply_type),
|
||
trace_id: toStringSafe(lastAssistant?.trace_id),
|
||
detected_mode: toStringSafe(debug?.detected_mode),
|
||
execution_lane: toStringSafe(debug?.execution_lane),
|
||
tool_gate_decision: toStringSafe(debug?.tool_gate_decision),
|
||
living_router_mode: toStringSafe(debug?.living_router_mode),
|
||
fallback_type: toStringSafe(debug?.fallback_type)
|
||
};
|
||
}
|
||
|
||
function loadSessionDialog(runId: string, caseId: string): {
|
||
source: "assistant_session";
|
||
session_id: string;
|
||
messages: Array<Record<string, unknown>>;
|
||
decomposition: string[];
|
||
assistant_mode: Record<string, unknown> | null;
|
||
} | null {
|
||
const sessionId = `${runId}-${caseId}`;
|
||
const filePath = path.resolve(ASSISTANT_SESSIONS_DIR, `${sessionId}.json`);
|
||
if (!fs.existsSync(filePath)) {
|
||
return null;
|
||
}
|
||
let parsed: unknown;
|
||
try {
|
||
parsed = JSON.parse(fs.readFileSync(filePath, "utf-8")) as unknown;
|
||
} catch {
|
||
return null;
|
||
}
|
||
const record = toRecord(parsed);
|
||
if (!record) return null;
|
||
|
||
const conversation = toArray(record.conversation)
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null);
|
||
const messages = conversation.map((item) => ({
|
||
message_id: toStringSafe(item.message_id),
|
||
role: toStringSafe(item.role) ?? "unknown",
|
||
text: toStringSafe(item.text) ?? "",
|
||
created_at: toStringSafe(item.created_at),
|
||
trace_id: toStringSafe(item.trace_id),
|
||
reply_type: toStringSafe(item.reply_type)
|
||
}));
|
||
|
||
const turns = toArray(record.turns)
|
||
.map((item) => toRecord(item))
|
||
.filter((item): item is Record<string, unknown> => item !== null);
|
||
const lastTurn = turns.length > 0 ? turns[turns.length - 1] : null;
|
||
const humanReadable = toRecord(lastTurn?.human_readable);
|
||
const decomposition = toArray(humanReadable?.decomposition)
|
||
.map((item) => toStringSafe(item))
|
||
.filter((item): item is string => item !== null);
|
||
|
||
return {
|
||
source: "assistant_session",
|
||
session_id: sessionId,
|
||
messages,
|
||
decomposition,
|
||
assistant_mode: buildAssistantModeSummary(record)
|
||
};
|
||
}
|
||
|
||
function buildFallbackDialog(run: IndexedRun, caseId: string): {
|
||
source: "report_fallback" | "none";
|
||
session_id: string;
|
||
messages: Array<Record<string, unknown>>;
|
||
decomposition: string[];
|
||
assistant_mode: Record<string, unknown> | null;
|
||
} {
|
||
const sessionId = `${run.run_id}-${caseId}`;
|
||
const results = getResultCases(run.report);
|
||
const targetCase = results.find((item) => (toStringSafe(item.case_id) ?? "") === caseId) ?? null;
|
||
if (!targetCase) {
|
||
return {
|
||
source: "none",
|
||
session_id: sessionId,
|
||
messages: [],
|
||
decomposition: [],
|
||
assistant_mode: null
|
||
};
|
||
}
|
||
|
||
const userText =
|
||
toStringSafe(targetCase.raw_question) ??
|
||
toStringSafe(targetCase.user_query_raw) ??
|
||
`Case ${caseId}`;
|
||
|
||
const assistantSummaryParts: string[] = [];
|
||
const validationPassed = toBooleanSafe(targetCase.validation_passed);
|
||
if (validationPassed !== null) assistantSummaryParts.push(`validation_passed=${validationPassed}`);
|
||
const routeMatch = toBooleanSafe(targetCase.route_match);
|
||
if (routeMatch !== null) assistantSummaryParts.push(`route_match=${routeMatch}`);
|
||
const intentMatch = toBooleanSafe(targetCase.intent_match);
|
||
if (intentMatch !== null) assistantSummaryParts.push(`intent_match=${intentMatch}`);
|
||
const confidence = toStringSafe(targetCase.confidence_overall);
|
||
if (confidence) assistantSummaryParts.push(`confidence=${confidence}`);
|
||
const metricSubscores = toRecord(targetCase.metric_subscores);
|
||
if (metricSubscores) {
|
||
for (const [key, value] of Object.entries(metricSubscores)) {
|
||
if (toNumberSafe(value) !== null) {
|
||
assistantSummaryParts.push(`${key}=${value}`);
|
||
}
|
||
}
|
||
}
|
||
if (assistantSummaryParts.length === 0) {
|
||
assistantSummaryParts.push("No structured assistant dialog is available for this case in report artifacts.");
|
||
}
|
||
|
||
return {
|
||
source: "report_fallback",
|
||
session_id: sessionId,
|
||
messages: [
|
||
{
|
||
message_id: null,
|
||
role: "user",
|
||
text: userText,
|
||
created_at: null,
|
||
trace_id: null,
|
||
reply_type: null
|
||
},
|
||
{
|
||
message_id: null,
|
||
role: "assistant",
|
||
text: assistantSummaryParts.join("\n"),
|
||
created_at: null,
|
||
trace_id: toStringSafe(targetCase.trace_id),
|
||
reply_type: toStringSafe(targetCase.reply_type)
|
||
}
|
||
],
|
||
decomposition: [],
|
||
assistant_mode: null
|
||
};
|
||
}
|
||
|
||
function withMessageAnnotations(
|
||
runId: string,
|
||
caseId: string,
|
||
messages: Array<Record<string, unknown>>,
|
||
annotations: AutoRunAnnotationRecord[]
|
||
): Array<Record<string, unknown>> {
|
||
const byIndex = buildAnnotationsByMessageIndex(runId, caseId, annotations);
|
||
return messages.map((message, index) => {
|
||
const annotation = byIndex.get(index) ?? null;
|
||
return {
|
||
...message,
|
||
message_index: index,
|
||
commented: annotation !== null,
|
||
annotation
|
||
};
|
||
});
|
||
}
|
||
|
||
function buildRunAggregateDialog(
|
||
run: IndexedRun,
|
||
annotations: AutoRunAnnotationRecord[]
|
||
): {
|
||
source: "run_aggregate";
|
||
session_id: string;
|
||
messages: Array<Record<string, unknown>>;
|
||
decomposition: string[];
|
||
assistant_mode: Record<string, unknown> | null;
|
||
} {
|
||
const cases = buildCaseSummaries(run.report, run.run_id, false);
|
||
const messages: Array<Record<string, unknown>> = [];
|
||
const decomposition: string[] = [];
|
||
let globalMessageIndex = 0;
|
||
|
||
for (const item of cases) {
|
||
const caseId = item.case_id;
|
||
const caseDialog = loadSessionDialog(run.run_id, caseId) ?? buildFallbackDialog(run, caseId);
|
||
const annotatedCaseMessages = withMessageAnnotations(run.run_id, caseId, caseDialog.messages, annotations);
|
||
|
||
for (const caseMessage of annotatedCaseMessages) {
|
||
const localMessageIndex = toNumberSafe(caseMessage.message_index) ?? 0;
|
||
messages.push({
|
||
...caseMessage,
|
||
case_id: caseId,
|
||
case_message_index: localMessageIndex,
|
||
message_index: globalMessageIndex
|
||
});
|
||
globalMessageIndex += 1;
|
||
}
|
||
|
||
if (caseDialog.decomposition.length > 0) {
|
||
decomposition.push(...caseDialog.decomposition.map((step) => `[${caseId}] ${step}`));
|
||
}
|
||
}
|
||
|
||
return {
|
||
source: "run_aggregate",
|
||
session_id: `${run.run_id}::__all__`,
|
||
messages,
|
||
decomposition,
|
||
assistant_mode: null
|
||
};
|
||
}
|
||
|
||
function generateAnnotationId(): string {
|
||
return `ann-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 9)}`;
|
||
}
|
||
|
||
function parseComment(value: unknown): string {
|
||
const text = toStringSafe(value) ?? "";
|
||
return text.trim();
|
||
}
|
||
|
||
function parseDecisionFilter(value: unknown): ManualCaseDecision | "all" {
|
||
const normalized = toStringSafe(value);
|
||
if (!normalized || normalized === "all") return "all";
|
||
return parseManualCaseDecision(normalized);
|
||
}
|
||
|
||
function parseAutoGenMode(value: unknown): AutoGenMode {
|
||
const normalized = toStringSafe(value)?.toLowerCase() ?? "";
|
||
if (normalized === "qwen_seed" || normalized === "codex_creative") {
|
||
return normalized;
|
||
}
|
||
return "codex_creative";
|
||
}
|
||
|
||
function parseAutogenCount(value: unknown): number {
|
||
return clampInt(toNumberSafe(value), 1, 200, 24);
|
||
}
|
||
|
||
function parseAutogenDomain(value: unknown): string | null {
|
||
const domain = normalizeDomainHint(value);
|
||
if (!domain) return null;
|
||
return domain.slice(0, 80);
|
||
}
|
||
|
||
function parseAutogenLlmRuntimeConfig(
|
||
body: Record<string, unknown>,
|
||
context: Record<string, unknown> | null
|
||
): AutoGenLlmRuntimeConfig | null {
|
||
const llm = toRecord(body.llm);
|
||
const providerRaw = toStringSafe(llm?.llm_provider ?? context?.llm_provider)?.toLowerCase() ?? "";
|
||
const model = toStringSafe(llm?.model ?? context?.model);
|
||
if (!model || (providerRaw !== "openai" && providerRaw !== "local")) {
|
||
return null;
|
||
}
|
||
|
||
return {
|
||
llm_provider: providerRaw === "local" ? "local" : "openai",
|
||
api_key: toStringSafe(llm?.api_key) ?? "",
|
||
model,
|
||
base_url: toStringSafe(llm?.base_url),
|
||
temperature: toNumberSafe(llm?.temperature),
|
||
max_output_tokens: toNumberSafe(llm?.max_output_tokens)
|
||
};
|
||
}
|
||
|
||
function textMojibakeScore(value: string): number {
|
||
const source = String(value ?? "");
|
||
const cyrillic = (source.match(/[А-Яа-яЁё]/g) ?? []).length;
|
||
const latin = (source.match(/[A-Za-z]/g) ?? []).length;
|
||
const hardMarkers = (source.match(/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/g) ?? []).length;
|
||
const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length;
|
||
const doubleEncodedMarkers = (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length;
|
||
return cyrillic + latin - hardMarkers * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2;
|
||
}
|
||
|
||
function looksLikeMojibake(value: string): boolean {
|
||
const source = String(value ?? "");
|
||
if (!source.trim()) {
|
||
return false;
|
||
}
|
||
if (/[Ѓѓ‚„…†‡€‰‹ЉЊЌЋЏ‘’“”•–—™љ›њќћџ]/.test(source)) {
|
||
return true;
|
||
}
|
||
if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) {
|
||
return true;
|
||
}
|
||
return (source.match(/(?:Г[Ђ-џ]|В[Ђ-џ]|Ã.|Â.)/gu) ?? []).length >= 2;
|
||
}
|
||
|
||
function repairAutogenMojibake(value: string): string {
|
||
const source = String(value ?? "");
|
||
if (!looksLikeMojibake(source)) {
|
||
return source;
|
||
}
|
||
let candidate = source;
|
||
for (let pass = 0; pass < 3; pass += 1) {
|
||
let improved = false;
|
||
try {
|
||
const fromWin1251 = iconv.encode(candidate, "win1251").toString("utf8");
|
||
if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) {
|
||
candidate = fromWin1251;
|
||
improved = true;
|
||
}
|
||
} catch {
|
||
// ignore
|
||
}
|
||
try {
|
||
const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8");
|
||
if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) {
|
||
candidate = fromLatin1;
|
||
improved = true;
|
||
}
|
||
} catch {
|
||
// ignore
|
||
}
|
||
if (!improved) {
|
||
break;
|
||
}
|
||
}
|
||
return candidate;
|
||
}
|
||
|
||
function sanitizeGeneratedQuestion(value: string): string {
|
||
return repairAutogenMojibake(String(value ?? ""))
|
||
.replace(/\r/g, " ")
|
||
.replace(/\t/g, " ")
|
||
.replace(/\s+/g, " ")
|
||
.trim();
|
||
}
|
||
|
||
function splitQuestionCandidates(rawText: string): string[] {
|
||
const normalized = repairAutogenMojibake(rawText).replace(/\r/g, "\n").trim();
|
||
if (!normalized) return [];
|
||
|
||
const unescaped = normalized.replace(/\\"/g, '"').replace(/\\n/g, "\n");
|
||
const byLines = unescaped
|
||
.split(/\n+/g)
|
||
.map((line) => line.replace(/^\s*(?:[-*•]|\d{1,3}[).:]?)\s*/, ""))
|
||
.map((line) => sanitizeGeneratedQuestion(line))
|
||
.filter((line) => line.length > 0);
|
||
if (byLines.length > 1) {
|
||
return byLines;
|
||
}
|
||
|
||
const questionMarkCount = (unescaped.match(/\?/g) ?? []).length;
|
||
if (questionMarkCount > 1) {
|
||
const byQuestion = unescaped
|
||
.split("?")
|
||
.map((chunk) => sanitizeGeneratedQuestion(chunk))
|
||
.filter((chunk) => chunk.length > 0)
|
||
.map((chunk) => (chunk.endsWith("?") ? chunk : `${chunk}?`));
|
||
if (byQuestion.length > 1) {
|
||
return byQuestion;
|
||
}
|
||
}
|
||
|
||
const quoted = Array.from(unescaped.matchAll(/"([^"\n]{6,}?)"/g))
|
||
.map((match) => sanitizeGeneratedQuestion(match[1]))
|
||
.filter((line) => line.length > 0);
|
||
if (quoted.length > 1) {
|
||
return quoted;
|
||
}
|
||
|
||
const cleaned = sanitizeGeneratedQuestion(unescaped);
|
||
return cleaned ? [cleaned] : [];
|
||
}
|
||
|
||
function parseAutogenOutputJson(rawText: string): unknown | null {
|
||
const cleaned = repairAutogenMojibake(rawText)
|
||
.trim()
|
||
.replace(/^```json\s*/i, "")
|
||
.replace(/^```\s*/i, "")
|
||
.replace(/```$/i, "")
|
||
.trim();
|
||
if (!cleaned) return null;
|
||
try {
|
||
return JSON.parse(cleaned) as unknown;
|
||
} catch {
|
||
// continue
|
||
}
|
||
|
||
const arrayStart = cleaned.indexOf("[");
|
||
const arrayEnd = cleaned.lastIndexOf("]");
|
||
if (arrayStart >= 0 && arrayEnd > arrayStart) {
|
||
const fragment = cleaned.slice(arrayStart, arrayEnd + 1);
|
||
try {
|
||
return JSON.parse(fragment) as unknown;
|
||
} catch {
|
||
// continue
|
||
}
|
||
}
|
||
|
||
const objStart = cleaned.indexOf("{");
|
||
const objEnd = cleaned.lastIndexOf("}");
|
||
if (objStart >= 0 && objEnd > objStart) {
|
||
const fragment = cleaned.slice(objStart, objEnd + 1);
|
||
try {
|
||
return JSON.parse(fragment) as unknown;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function collectQuestionsFromCandidate(value: unknown, depth = 0): string[] {
|
||
if (depth > 5 || value === null || value === undefined) {
|
||
return [];
|
||
}
|
||
|
||
if (Array.isArray(value)) {
|
||
return value.flatMap((item) => collectQuestionsFromCandidate(item, depth + 1));
|
||
}
|
||
|
||
if (typeof value === "string") {
|
||
const text = value.trim();
|
||
if (!text) return [];
|
||
|
||
const nestedParsed = parseAutogenOutputJson(text);
|
||
if (nestedParsed !== null) {
|
||
const nestedQuestions = collectQuestionsFromCandidate(nestedParsed, depth + 1);
|
||
if (nestedQuestions.length > 0) {
|
||
return nestedQuestions;
|
||
}
|
||
}
|
||
|
||
try {
|
||
const decoded = JSON.parse(text) as unknown;
|
||
if (decoded !== text) {
|
||
const decodedQuestions = collectQuestionsFromCandidate(decoded, depth + 1);
|
||
if (decodedQuestions.length > 0) {
|
||
return decodedQuestions;
|
||
}
|
||
}
|
||
} catch {
|
||
// ignore non-JSON strings
|
||
}
|
||
|
||
return splitQuestionCandidates(text);
|
||
}
|
||
|
||
const record = toRecord(value);
|
||
if (!record) {
|
||
return [];
|
||
}
|
||
|
||
const fromQuestions = collectQuestionsFromCandidate(record.questions, depth + 1);
|
||
if (fromQuestions.length > 0) {
|
||
return fromQuestions;
|
||
}
|
||
|
||
const fallbackText = toStringSafe(record.question ?? record.user_message ?? record.text);
|
||
return fallbackText ? splitQuestionCandidates(fallbackText) : [];
|
||
}
|
||
|
||
function extractQuestionsFromAutogenOutput(rawText: string): string[] {
|
||
const parsed = parseAutogenOutputJson(rawText);
|
||
const fromParsed = collectQuestionsFromCandidate(parsed);
|
||
if (fromParsed.length > 0) {
|
||
return fromParsed;
|
||
}
|
||
return collectQuestionsFromCandidate(rawText);
|
||
}
|
||
|
||
async function generateQwenSeedQuestionsLive(input: {
|
||
count: number;
|
||
domain: string | null;
|
||
personalityPrompt: string | null;
|
||
llmConfig: AutoGenLlmRuntimeConfig;
|
||
client: OpenAIResponsesClient;
|
||
}): Promise<string[]> {
|
||
const seedExamples = collectCanonicalQuestions(40);
|
||
const fallbackExamples = fallbackDomainTemplates(input.domain);
|
||
const examples = (seedExamples.length > 0 ? seedExamples : fallbackExamples).slice(0, 8);
|
||
const personalityPrompt =
|
||
input.personalityPrompt ??
|
||
"Генерируй реалистичные вопросы бухгалтера по 1С. Разговорный стиль допустим, но смысл должен быть четким.";
|
||
const repairedPersonalityPrompt = repairAutogenMojibake(personalityPrompt);
|
||
const maxOutputTokens = clampInt(input.llmConfig.max_output_tokens, 300, 3000, 1200);
|
||
const temperature = input.llmConfig.temperature === null ? 0.5 : Math.max(0, Math.min(1.5, input.llmConfig.temperature));
|
||
|
||
const systemPrompt = [
|
||
"Ты генератор вопросов для автопрогонов бухгалтерского ассистента по 1С.",
|
||
"Возвращай только JSON и никаких пояснений.",
|
||
"Ассистент работает в read-only режиме: не проси действий изменения базы."
|
||
].join(" ");
|
||
const repairedSystemPrompt = repairAutogenMojibake(systemPrompt);
|
||
|
||
const developerPrompt = [
|
||
`Нужно сгенерировать ровно ${input.count} вопросов.`,
|
||
"Формат ответа строго:",
|
||
'{"questions":["вопрос 1","вопрос 2"]}',
|
||
"Требования:",
|
||
"1) каждый вопрос отдельный, без дубликатов;",
|
||
"2) живой пользовательский язык;",
|
||
"3) допустимы легкие разговорные сокращения;",
|
||
"4) не выдавай мета-комментарии и не описывай правила."
|
||
].join("\n");
|
||
const repairedDeveloperPrompt = repairAutogenMojibake(developerPrompt);
|
||
|
||
const userMessage = [
|
||
`Домен: ${input.domain ?? "general"}.`,
|
||
`Промпт личности: ${repairedPersonalityPrompt}`,
|
||
"Примеры ориентиров по стилю и тематике:",
|
||
...examples.map((item, index) => `${index + 1}. ${item}`)
|
||
].join("\n");
|
||
const repairedUserMessage = repairAutogenMojibake(userMessage);
|
||
|
||
const response = await input.client.chat(
|
||
{
|
||
llmProvider: input.llmConfig.llm_provider,
|
||
apiKey: input.llmConfig.api_key,
|
||
model: input.llmConfig.model,
|
||
baseUrl: input.llmConfig.base_url ?? undefined,
|
||
temperature,
|
||
maxOutputTokens: maxOutputTokens
|
||
},
|
||
{
|
||
systemPrompt: repairedSystemPrompt,
|
||
developerPrompt: repairedDeveloperPrompt,
|
||
userMessage: repairedUserMessage,
|
||
temperature,
|
||
maxOutputTokens
|
||
}
|
||
);
|
||
|
||
const extracted = extractQuestionsFromAutogenOutput(response.outputText);
|
||
const normalized = Array.from(new Set(extracted.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0)));
|
||
if (normalized.length === 0) {
|
||
throw new ApiError("AUTOGEN_LLM_EMPTY_OUTPUT", "Qwen не вернул пригодные вопросы для автогенерации.", 502, {
|
||
model: input.llmConfig.model
|
||
});
|
||
}
|
||
|
||
const fallback = generateQwenSeedQuestions(input.count, input.domain);
|
||
return Array.from(new Set([...normalized, ...fallback])).slice(0, input.count);
|
||
}
|
||
|
||
function hasAnyRunFilterQuery(query: Record<string, unknown>): boolean {
|
||
return Boolean(
|
||
toStringSafe(query.from) ??
|
||
toStringSafe(query.to) ??
|
||
toStringSafe(query.target) ??
|
||
toStringSafe(query.mode) ??
|
||
toStringSafe(query.use_mock) ??
|
||
toStringSafe(query.prompt_contains)
|
||
);
|
||
}
|
||
|
||
function buildAutogenCaseSetFileName(mode: AutoGenMode, generationId: string): string {
|
||
const now = new Date();
|
||
const stamp = [
|
||
now.getUTCFullYear(),
|
||
String(now.getUTCMonth() + 1).padStart(2, "0"),
|
||
String(now.getUTCDate()).padStart(2, "0"),
|
||
String(now.getUTCHours()).padStart(2, "0"),
|
||
String(now.getUTCMinutes()).padStart(2, "0"),
|
||
String(now.getUTCSeconds()).padStart(2, "0")
|
||
].join("");
|
||
return `assistant_autogen_${mode}_${stamp}_${generationId}.json`;
|
||
}
|
||
|
||
function buildAutogenCaseSetPayload(input: {
|
||
generationId: string;
|
||
mode: AutoGenMode;
|
||
domain: string | null;
|
||
questions: string[];
|
||
}): Record<string, unknown> {
|
||
const normalizedQuestions = Array.from(
|
||
new Set(input.questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))
|
||
);
|
||
const cases = normalizedQuestions.map((question, index) => ({
|
||
case_id: `AUTO-${String(index + 1).padStart(3, "0")}`,
|
||
scenario_tag: `${input.mode}_${input.domain ?? "general"}`,
|
||
question_type: "direct",
|
||
broadness_level: "medium",
|
||
turns: [{ user_message: question }],
|
||
expected_hints: {
|
||
expected_reply_type: null,
|
||
expected_degraded_to: null
|
||
}
|
||
}));
|
||
return {
|
||
suite_id: `assistant_autogen_${input.generationId}`,
|
||
suite_version: "0.1.0",
|
||
schema_version: "assistant_autogen_suite_v0_1",
|
||
generated_at: new Date().toISOString(),
|
||
generation_id: input.generationId,
|
||
mode: input.mode,
|
||
domain: input.domain,
|
||
scenario_count: cases.length,
|
||
case_ids: cases.map((item) => item.case_id),
|
||
cases
|
||
};
|
||
}
|
||
|
||
function collectPostAnalysis(
|
||
annotations: AutoRunAnnotationRecord[],
|
||
runMap: Map<string, IndexedRun>,
|
||
limitPerQueue: number
|
||
): Record<string, unknown> {
|
||
const byDecision: Record<string, number> = {};
|
||
const byQueue: Record<string, number> = {};
|
||
const byDomain = new Map<string, number>();
|
||
|
||
const queues: Record<string, Array<Record<string, unknown>>> = {
|
||
routing_extension: [],
|
||
policy_fix: [],
|
||
capability_registry: [],
|
||
soft_boundary: [],
|
||
safety_policy: [],
|
||
testset_hygiene: [],
|
||
covered_ok: []
|
||
};
|
||
|
||
const registry = loadCapabilitiesRegistry();
|
||
|
||
for (const item of annotations) {
|
||
byDecision[item.manual_case_decision] = (byDecision[item.manual_case_decision] ?? 0) + 1;
|
||
const queueKey = DECISION_QUEUE_MAP[item.manual_case_decision];
|
||
byQueue[queueKey] = (byQueue[queueKey] ?? 0) + 1;
|
||
|
||
const run = runMap.get(item.run_id) ?? null;
|
||
const caseSummary = run
|
||
? buildCaseSummaries(run.report, run.run_id, false).find((candidate) => candidate.case_id === item.case_id) ?? null
|
||
: null;
|
||
const nearestGroup =
|
||
resolveNearestCapabilityGroup({
|
||
domain: caseSummary?.domain ?? item.context.domain,
|
||
queryClass: caseSummary?.query_class ?? item.context.query_class
|
||
}) ??
|
||
registry.groups[0] ??
|
||
null;
|
||
|
||
const domainKey = caseSummary?.domain ?? item.context.domain ?? "unknown";
|
||
byDomain.set(domainKey, (byDomain.get(domainKey) ?? 0) + 1);
|
||
|
||
const view = {
|
||
annotation_id: item.annotation_id,
|
||
run_id: item.run_id,
|
||
case_id: item.case_id,
|
||
message_index: item.message_index,
|
||
rating: item.rating,
|
||
comment: item.comment,
|
||
manual_case_decision: item.manual_case_decision,
|
||
annotation_author: item.annotation_author,
|
||
updated_at: item.updated_at,
|
||
domain: caseSummary?.domain ?? item.context.domain ?? null,
|
||
query_class: caseSummary?.query_class ?? item.context.query_class ?? null,
|
||
trace_id: item.context.trace_id ?? caseSummary?.trace_id ?? null,
|
||
reply_type: item.context.reply_type ?? caseSummary?.reply_type ?? null,
|
||
nearest_capability_group: nearestGroup
|
||
? {
|
||
group_code: nearestGroup.group_code,
|
||
group_title: nearestGroup.group_title,
|
||
maturity_status: nearestGroup.maturity_status
|
||
}
|
||
: null
|
||
};
|
||
|
||
if (queueKey === "none") {
|
||
if (queues.covered_ok.length < limitPerQueue) queues.covered_ok.push(view);
|
||
continue;
|
||
}
|
||
if (!queues[queueKey]) {
|
||
queues[queueKey] = [];
|
||
}
|
||
if (queues[queueKey].length < limitPerQueue) {
|
||
queues[queueKey].push(view);
|
||
}
|
||
}
|
||
|
||
const domainSummary = Array.from(byDomain.entries())
|
||
.map(([domain, total]) => ({ domain, total }))
|
||
.sort((a, b) => b.total - a.total);
|
||
|
||
return {
|
||
stats: {
|
||
annotations_total: annotations.length,
|
||
by_decision: byDecision,
|
||
by_queue: byQueue,
|
||
domains_total: domainSummary.length
|
||
},
|
||
domain_summary: domainSummary,
|
||
queues,
|
||
recommended_regression_candidates: [
|
||
...queues.routing_extension.slice(0, 20),
|
||
...queues.policy_fix.slice(0, 20),
|
||
...queues.safety_policy.slice(0, 20)
|
||
].slice(0, 60)
|
||
};
|
||
}
|
||
|
||
export function buildAutoRunsRouter(openaiClient = new OpenAIResponsesClient()): Router {
|
||
const router = Router();
|
||
|
||
router.get("/api/autoruns/history", (req, res) => {
|
||
const filters = parseFilters(req.query as Record<string, unknown>);
|
||
const indexed = indexRuns(filters.scan_limit);
|
||
const filtered = indexed.filter((run) => matchesFilters(run, filters)).slice(0, filters.limit);
|
||
const summaries = filtered.map((run) => buildRunSummary(run));
|
||
|
||
const availableTargets = Array.from(new Set(indexed.map((item) => item.eval_target))).sort();
|
||
const availableModes = Array.from(
|
||
new Set(indexed.map((item) => toStringSafe(item.report.mode)).filter((item): item is string => item !== null))
|
||
).sort();
|
||
const availablePromptVersions = Array.from(
|
||
new Set(indexed.map((item) => toStringSafe(item.report.prompt_version)).filter((item): item is string => item !== null))
|
||
).sort();
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
generated_at: new Date().toISOString(),
|
||
filters_applied: {
|
||
from: filters.from_ms === null ? null : new Date(filters.from_ms).toISOString(),
|
||
to: filters.to_ms === null ? null : new Date(filters.to_ms).toISOString(),
|
||
target: filters.target,
|
||
use_mock: filters.use_mock,
|
||
prompt_contains: filters.prompt_contains,
|
||
mode: filters.mode,
|
||
limit: filters.limit,
|
||
scan_limit: filters.scan_limit
|
||
},
|
||
available: {
|
||
targets: availableTargets,
|
||
modes: availableModes,
|
||
prompt_versions: availablePromptVersions
|
||
},
|
||
items: summaries,
|
||
stats: buildHistoryStats(summaries)
|
||
});
|
||
});
|
||
|
||
router.get("/api/autoruns/history/:run_id", (req, res, next) => {
|
||
try {
|
||
const runId = String(req.params.run_id ?? "").trim();
|
||
if (!runId) {
|
||
throw new ApiError("INVALID_RUN_ID", "run_id is required", 400);
|
||
}
|
||
const run = findRunById(runId);
|
||
if (!run) {
|
||
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
|
||
}
|
||
const annotations = readAnnotations();
|
||
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
|
||
const cases = buildCaseSummaries(run.report, run.run_id, true, annotationStatsByCase);
|
||
const coverage = buildCoverageFromCases(cases);
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
run: buildRunSummary(run),
|
||
coverage,
|
||
cases,
|
||
annotations_summary: {
|
||
total: annotations.filter((item) => item.run_id === runId).length
|
||
},
|
||
report: run.report
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.get("/api/autoruns/history/:run_id/case/:case_id/dialog", (req, res, next) => {
|
||
try {
|
||
const runId = String(req.params.run_id ?? "").trim();
|
||
const caseId = String(req.params.case_id ?? "").trim();
|
||
if (!runId || !caseId) {
|
||
throw new ApiError("INVALID_DIALOG_REQUEST", "run_id and case_id are required", 400);
|
||
}
|
||
const run = findRunById(runId);
|
||
if (!run) {
|
||
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
|
||
}
|
||
|
||
const annotations = readAnnotations();
|
||
if (caseId === "__all__") {
|
||
const dialog = buildRunAggregateDialog(run, annotations);
|
||
ok(res, {
|
||
ok: true,
|
||
run_id: runId,
|
||
case_id: "__all__",
|
||
...dialog,
|
||
annotations: annotations
|
||
.filter((item) => item.run_id === runId)
|
||
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
|
||
});
|
||
return;
|
||
}
|
||
|
||
const sessionDialog = loadSessionDialog(runId, caseId);
|
||
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
|
||
const messages = withMessageAnnotations(runId, caseId, dialog.messages, annotations);
|
||
ok(res, {
|
||
ok: true,
|
||
run_id: runId,
|
||
case_id: caseId,
|
||
...dialog,
|
||
messages,
|
||
annotations: annotations
|
||
.filter((item) => item.run_id === runId && item.case_id === caseId)
|
||
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.get("/api/autoruns/annotations", (req, res, next) => {
|
||
try {
|
||
const runIdFilter = toStringSafe((req.query as Record<string, unknown>).run_id);
|
||
const caseIdFilter = toStringSafe((req.query as Record<string, unknown>).case_id);
|
||
const minRatingRaw = toNumberSafe((req.query as Record<string, unknown>).min_rating);
|
||
const minRating = minRatingRaw === null ? null : clampInt(minRatingRaw, 1, 5, 1);
|
||
const decisionFilter = parseDecisionFilter((req.query as Record<string, unknown>).manual_case_decision);
|
||
const limit = clampInt(toNumberSafe((req.query as Record<string, unknown>).limit), 1, 2000, 400);
|
||
const scanLimit = clampInt(toNumberSafe((req.query as Record<string, unknown>).scan_limit), 50, 5000, 2500);
|
||
|
||
const annotations = readAnnotations()
|
||
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
|
||
.filter((item) => (caseIdFilter ? item.case_id === caseIdFilter : true))
|
||
.filter((item) => (minRating === null ? true : item.rating >= minRating))
|
||
.filter((item) => (decisionFilter === "all" ? true : item.manual_case_decision === decisionFilter))
|
||
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
|
||
.slice(0, limit);
|
||
|
||
const runIndex = indexRuns(scanLimit);
|
||
const runMap = new Map(runIndex.map((item) => [item.run_id, item]));
|
||
const items = annotations.map((item) => {
|
||
const run = runMap.get(item.run_id) ?? null;
|
||
const runSummary = run ? buildRunSummary(run) : null;
|
||
const cases = run ? buildCaseSummaries(run.report, run.run_id, false) : [];
|
||
const caseSummary = cases.find((candidate) => candidate.case_id === item.case_id) ?? null;
|
||
return {
|
||
...item,
|
||
run: runSummary,
|
||
case_summary: caseSummary,
|
||
technical_context: {
|
||
report_path: run?.report_path ?? null,
|
||
trace_id: item.context.trace_id,
|
||
reply_type: item.context.reply_type,
|
||
domain: item.context.domain,
|
||
query_class: item.context.query_class,
|
||
checks: caseSummary?.checks ?? null,
|
||
metric_subscores: caseSummary?.metric_subscores ?? null
|
||
}
|
||
};
|
||
});
|
||
|
||
const avgRating =
|
||
items.length > 0 ? Number((items.reduce((acc, item) => acc + item.rating, 0) / items.length).toFixed(2)) : null;
|
||
const byDecision = items.reduce<Record<string, number>>((acc, item) => {
|
||
acc[item.manual_case_decision] = (acc[item.manual_case_decision] ?? 0) + 1;
|
||
return acc;
|
||
}, {});
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
generated_at: new Date().toISOString(),
|
||
filters_applied: {
|
||
run_id: runIdFilter ?? null,
|
||
case_id: caseIdFilter ?? null,
|
||
min_rating: minRating,
|
||
manual_case_decision: decisionFilter,
|
||
limit
|
||
},
|
||
stats: {
|
||
total: items.length,
|
||
avg_rating: avgRating,
|
||
by_decision: byDecision
|
||
},
|
||
available_manual_case_decisions: MANUAL_CASE_DECISIONS,
|
||
manual_case_decision_schema: readManualDecisionSchema(),
|
||
items
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.post("/api/autoruns/annotations", (req, res, next) => {
|
||
try {
|
||
const body = toRecord(req.body);
|
||
if (!body) {
|
||
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "JSON body is required", 400);
|
||
}
|
||
const runId = toStringSafe(body.run_id);
|
||
const caseId = toStringSafe(body.case_id);
|
||
const messageIndexRaw = toNumberSafe(body.message_index);
|
||
const ratingRaw = toNumberSafe(body.rating);
|
||
const comment = parseComment(body.comment);
|
||
const manualCaseDecision = parseManualCaseDecision(body.manual_case_decision);
|
||
const annotationAuthor = parseAnnotationAuthor(body.annotation_author);
|
||
|
||
if (!runId || !caseId) {
|
||
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "run_id and case_id are required", 400);
|
||
}
|
||
if (messageIndexRaw === null) {
|
||
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "message_index is required", 400);
|
||
}
|
||
const messageIndex = clampInt(messageIndexRaw, 0, 100_000, 0);
|
||
if (ratingRaw === null) {
|
||
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "rating is required", 400);
|
||
}
|
||
const rating = clampInt(ratingRaw, 1, 5, 1);
|
||
if (comment.length === 0) {
|
||
throw new ApiError("INVALID_ANNOTATION_PAYLOAD", "comment is required", 400);
|
||
}
|
||
|
||
const run = findRunById(runId);
|
||
if (!run) {
|
||
throw new ApiError("AUTORUN_NOT_FOUND", `Run not found: ${runId}`, 404);
|
||
}
|
||
const cases = buildCaseSummaries(run.report, run.run_id, false);
|
||
const caseSummary = cases.find((item) => item.case_id === caseId) ?? null;
|
||
if (!caseSummary) {
|
||
throw new ApiError("AUTORUN_CASE_NOT_FOUND", `Case not found: ${caseId} in run ${runId}`, 404);
|
||
}
|
||
|
||
const sessionDialog = loadSessionDialog(runId, caseId);
|
||
const dialog = sessionDialog ?? buildFallbackDialog(run, caseId);
|
||
if (messageIndex >= dialog.messages.length) {
|
||
throw new ApiError("AUTORUN_MESSAGE_NOT_FOUND", `Message index ${messageIndex} out of range`, 400);
|
||
}
|
||
const targetMessage = dialog.messages[messageIndex];
|
||
const targetRole = toStringSafe(targetMessage.role) ?? "unknown";
|
||
if (targetRole !== "assistant") {
|
||
throw new ApiError("AUTORUN_MESSAGE_NOT_ASSISTANT", "Only assistant answers can be annotated", 400);
|
||
}
|
||
const pairedUserQuestion = [...dialog.messages.slice(0, messageIndex)]
|
||
.reverse()
|
||
.find((item) => (toStringSafe(item.role) ?? "") === "user");
|
||
|
||
const nowIso = new Date().toISOString();
|
||
const annotations = readAnnotations();
|
||
const key = annotationKey(runId, caseId, messageIndex);
|
||
const existingIndex = annotations.findIndex((item) => annotationKey(item.run_id, item.case_id, item.message_index) === key);
|
||
const existing = existingIndex >= 0 ? annotations[existingIndex] : null;
|
||
|
||
const annotation: AutoRunAnnotationRecord = {
|
||
annotation_id: existing?.annotation_id ?? generateAnnotationId(),
|
||
run_id: runId,
|
||
case_id: caseId,
|
||
session_id: caseSummary.session_id,
|
||
message_index: messageIndex,
|
||
rating,
|
||
comment,
|
||
manual_case_decision: manualCaseDecision,
|
||
annotation_author: annotationAuthor,
|
||
resolved: existing?.resolved ?? false,
|
||
resolved_at: existing?.resolved_at ?? null,
|
||
resolved_by: existing?.resolved_by ?? null,
|
||
created_at: existing?.created_at ?? nowIso,
|
||
updated_at: nowIso,
|
||
context: {
|
||
message_id: toStringSafe(targetMessage.message_id),
|
||
trace_id: toStringSafe(targetMessage.trace_id) ?? caseSummary.trace_id,
|
||
reply_type: toStringSafe(targetMessage.reply_type) ?? caseSummary.reply_type,
|
||
eval_target: run.eval_target,
|
||
prompt_version: toStringSafe(run.report.prompt_version),
|
||
domain: caseSummary.domain,
|
||
query_class: caseSummary.query_class,
|
||
question_text: toStringSafe(pairedUserQuestion?.text),
|
||
answer_text: toStringSafe(targetMessage.text)
|
||
}
|
||
};
|
||
|
||
if (existingIndex >= 0) {
|
||
annotations[existingIndex] = annotation;
|
||
} else {
|
||
annotations.push(annotation);
|
||
}
|
||
writeAnnotations(annotations);
|
||
|
||
const annotationStatsByCase = buildAnnotationStatsMap(runId, annotations);
|
||
const caseStats = annotationStatsByCase.get(caseId) ?? null;
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
annotation,
|
||
case_annotation_stats: caseStats
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.patch("/api/autoruns/annotations/:annotation_id", (req, res, next) => {
|
||
try {
|
||
const annotationId = toStringSafe(req.params.annotation_id);
|
||
if (!annotationId) {
|
||
throw new ApiError("INVALID_ANNOTATION_ID", "annotation_id is required", 400);
|
||
}
|
||
|
||
const body = toRecord(req.body);
|
||
if (!body) {
|
||
throw new ApiError("INVALID_ANNOTATION_PATCH", "JSON body is required", 400);
|
||
}
|
||
|
||
const resolved = toBooleanSafe(body.resolved);
|
||
if (resolved === null) {
|
||
throw new ApiError("INVALID_ANNOTATION_PATCH", "resolved flag is required", 400);
|
||
}
|
||
const resolvedBy = parseAnnotationAuthor(body.resolved_by);
|
||
|
||
const annotations = readAnnotations();
|
||
const index = annotations.findIndex((item) => item.annotation_id === annotationId);
|
||
if (index < 0) {
|
||
throw new ApiError("ANNOTATION_NOT_FOUND", `Annotation not found: ${annotationId}`, 404);
|
||
}
|
||
|
||
const nowIso = new Date().toISOString();
|
||
const current = annotations[index];
|
||
const updated: AutoRunAnnotationRecord = {
|
||
...current,
|
||
resolved,
|
||
resolved_at: resolved ? nowIso : null,
|
||
resolved_by: resolved ? resolvedBy ?? current.resolved_by ?? null : null,
|
||
updated_at: nowIso
|
||
};
|
||
|
||
annotations[index] = updated;
|
||
writeAnnotations(annotations);
|
||
|
||
const statsByCase = buildAnnotationStatsMap(updated.run_id, annotations);
|
||
const caseStats = statsByCase.get(updated.case_id) ?? null;
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
annotation: updated,
|
||
case_annotation_stats: caseStats
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.get("/api/autoruns/manual-decision-schema", (_req, res) => {
|
||
ok(res, {
|
||
ok: true,
|
||
schema: readManualDecisionSchema(),
|
||
enum: MANUAL_CASE_DECISIONS
|
||
});
|
||
});
|
||
|
||
router.get("/api/autoruns/post-analysis", (req, res, next) => {
|
||
try {
|
||
const query = req.query as Record<string, unknown>;
|
||
const runIdFilter = toStringSafe(query.run_id);
|
||
const limitPerQueue = clampInt(toNumberSafe(query.limit_per_queue), 5, 250, 40);
|
||
const annotationLimit = clampInt(toNumberSafe(query.annotation_limit), 20, 5000, 1500);
|
||
const scanLimit = clampInt(toNumberSafe(query.scan_limit), 50, 5000, 2500);
|
||
|
||
const runFilters = parseFilters(query);
|
||
const applyRunFilters = hasAnyRunFilterQuery(query);
|
||
const runIndex = indexRuns(Math.max(scanLimit, runFilters.scan_limit));
|
||
const filteredRuns = applyRunFilters ? runIndex.filter((run) => matchesFilters(run, runFilters)) : runIndex;
|
||
const runMap = new Map(filteredRuns.map((run) => [run.run_id, run]));
|
||
|
||
const scopedAnnotations = readAnnotations()
|
||
.filter((item) => (runIdFilter ? item.run_id === runIdFilter : true))
|
||
.filter((item) => (runMap.size > 0 ? runMap.has(item.run_id) : true))
|
||
.sort((a, b) => Date.parse(b.updated_at) - Date.parse(a.updated_at))
|
||
.slice(0, annotationLimit);
|
||
|
||
const analysis = collectPostAnalysis(scopedAnnotations, runMap, limitPerQueue);
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
generated_at: new Date().toISOString(),
|
||
filters_applied: {
|
||
run_id: runIdFilter ?? null,
|
||
run_filters_applied: applyRunFilters,
|
||
limit_per_queue: limitPerQueue,
|
||
annotation_limit: annotationLimit,
|
||
scan_limit: scanLimit
|
||
},
|
||
runs_considered: filteredRuns.slice(0, 500).map((item) => buildRunSummary(item)),
|
||
manual_case_decision_schema: readManualDecisionSchema(),
|
||
post_analysis: analysis
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.get("/api/autoruns/autogen/history", (req, res, next) => {
|
||
try {
|
||
const limit = clampInt(toNumberSafe((req.query as Record<string, unknown>).limit), 1, 500, 120);
|
||
const rawMode = toStringSafe((req.query as Record<string, unknown>).mode);
|
||
const includeAllModes = !rawMode || !["qwen_seed", "codex_creative"].includes(rawMode);
|
||
const modeFilter = (rawMode as AutoGenMode | null) ?? "codex_creative";
|
||
const items = readAutoGenHistory()
|
||
.filter((item) => (includeAllModes ? true : item.mode === modeFilter))
|
||
.slice(0, limit);
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
generated_at: new Date().toISOString(),
|
||
items
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.get("/api/autoruns/autogen/personality-catalog", (_req, res, next) => {
|
||
try {
|
||
ok(res, {
|
||
ok: true,
|
||
generated_at: new Date().toISOString(),
|
||
items: buildAutogenPersonalityCatalog()
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
router.post("/api/autoruns/autogen/generate", async (req, res, next) => {
|
||
try {
|
||
const body = toRecord(req.body);
|
||
if (!body) {
|
||
throw new ApiError("INVALID_AUTOGEN_PAYLOAD", "JSON body is required", 400);
|
||
}
|
||
const mode = parseAutoGenMode(body.mode);
|
||
const count = parseAutogenCount(body.count);
|
||
const domain = parseAutogenDomain(body.domain);
|
||
const persistCaseSet = toBooleanSafe(body.persist_to_eval_cases) ?? true;
|
||
const generatedBy = parseAnnotationAuthor(body.generated_by);
|
||
const context = toRecord(body.context);
|
||
const llmConfig = parseAutogenLlmRuntimeConfig(body, context);
|
||
const personalityPrompt = toStringSafe(context?.autogen_personality_prompt);
|
||
|
||
let questions: string[] = [];
|
||
if (mode === "qwen_seed") {
|
||
if (!llmConfig) {
|
||
throw new ApiError(
|
||
"AUTOGEN_LLM_CONFIG_REQUIRED",
|
||
"Для режима qwen_seed нужен активный LLM-контур (provider/model/baseUrl) из настроек подключения.",
|
||
400
|
||
);
|
||
}
|
||
questions = await generateQwenSeedQuestionsLive({
|
||
count,
|
||
domain,
|
||
personalityPrompt,
|
||
llmConfig,
|
||
client: openaiClient
|
||
});
|
||
} else {
|
||
questions = generateCodexCreativeQuestions(count, domain);
|
||
}
|
||
questions = Array.from(new Set(questions.map((item) => sanitizeGeneratedQuestion(item)).filter((item) => item.length > 0))).slice(
|
||
0,
|
||
count
|
||
);
|
||
const generationId = generateAutogenId();
|
||
|
||
let savedCaseSetFile: string | null = null;
|
||
if (persistCaseSet) {
|
||
if (!fs.existsSync(EVAL_CASES_DIR)) {
|
||
fs.mkdirSync(EVAL_CASES_DIR, { recursive: true });
|
||
}
|
||
const fileName = buildAutogenCaseSetFileName(mode, generationId);
|
||
const filePath = path.resolve(EVAL_CASES_DIR, fileName);
|
||
const payload = buildAutogenCaseSetPayload({
|
||
generationId,
|
||
mode,
|
||
domain,
|
||
questions
|
||
});
|
||
fs.writeFileSync(filePath, JSON.stringify(payload, null, 2), "utf-8");
|
||
savedCaseSetFile = fileName;
|
||
}
|
||
|
||
const record: AutoGenHistoryRecord = {
|
||
generation_id: generationId,
|
||
created_at: new Date().toISOString(),
|
||
mode,
|
||
count: questions.length,
|
||
domain,
|
||
questions,
|
||
generated_by: generatedBy,
|
||
saved_case_set_file: savedCaseSetFile,
|
||
context: context
|
||
? {
|
||
llm_provider: toStringSafe(context.llm_provider),
|
||
model: toStringSafe(context.model),
|
||
assistant_prompt_version: toStringSafe(context.assistant_prompt_version),
|
||
decomposition_prompt_version: toStringSafe(context.decomposition_prompt_version),
|
||
prompt_fingerprint: toStringSafe(context.prompt_fingerprint)
|
||
? repairAutogenMojibake(String(context.prompt_fingerprint))
|
||
: null,
|
||
autogen_personality_id: toStringSafe(context.autogen_personality_id),
|
||
autogen_personality_prompt: toStringSafe(context.autogen_personality_prompt)
|
||
? repairAutogenMojibake(String(context.autogen_personality_prompt))
|
||
: null
|
||
}
|
||
: null
|
||
};
|
||
const history = readAutoGenHistory();
|
||
history.unshift(record);
|
||
writeAutoGenHistory(history.slice(0, 500));
|
||
|
||
ok(res, {
|
||
ok: true,
|
||
generation: record
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
return router;
|
||
}
|
||
|
||
|