import fs from "node:fs"; import path from "node:path"; import { describe, expect, it } from "vitest"; import { AddressQueryService } from "../src/services/addressQueryService"; import { resolveAssistantOrchestrationDecision } from "../src/services/assistantService"; import { resolveAddressIntent } from "../src/services/addressIntentResolver"; type ManualCaseDecision = | "candidate_for_implementation" | "needs_dialog_policy_fix" | "needs_routing_extension" | "bad_test_case"; interface AnnotationContext { question_text?: string; } interface AnnotationRecord { run_id: string; case_id: string; manual_case_decision: ManualCaseDecision; resolved: boolean; context?: AnnotationContext | null; } interface ManualCase { runId: string; caseId: string; decision: ManualCaseDecision; question: string; resolved: boolean; } const MANUAL_CASE_KEYS = [ "assistant-stage1-UMKkFYfg2L::AUTO-003", "assistant-stage1-UMKkFYfg2L::AUTO-007", "assistant-stage1-UMKkFYfg2L::AUTO-009", "assistant-stage1-UMKkFYfg2L::AUTO-012", "assistant-stage1-UMKkFYfg2L::AUTO-015", "assistant-stage1-UMKkFYfg2L::AUTO-017", "assistant-stage1-ywEyJgFkC4::AUTO-002", "assistant-stage1-ywEyJgFkC4::AUTO-004", "assistant-stage1-ywEyJgFkC4::AUTO-005", "assistant-stage1-ywEyJgFkC4::AUTO-006", "assistant-stage1-ywEyJgFkC4::AUTO-009", "assistant-stage1-ywEyJgFkC4::AUTO-013", "assistant-stage1-ywEyJgFkC4::AUTO-014", "assistant-stage1-ywEyJgFkC4::AUTO-015", "assistant-stage1-ZL97weIIRG::AUTO-005", "assistant-stage1-ZL97weIIRG::AUTO-008", "assistant-stage1-ZL97weIIRG::AUTO-009", "assistant-stage1-ZL97weIIRG::AUTO-010" ] as const; const FORMER_UNKNOWN_INTENTS = new Set([ "assistant-stage1-ywEyJgFkC4::AUTO-002", "assistant-stage1-ywEyJgFkC4::AUTO-009", "assistant-stage1-ywEyJgFkC4::AUTO-013", "assistant-stage1-ywEyJgFkC4::AUTO-015", "assistant-stage1-ZL97weIIRG::AUTO-009", "assistant-stage1-ZL97weIIRG::AUTO-010" ]); function textMojibakeScore(value: string): number { const lower = value.toLowerCase(); let score = 0; const badFragments = ["рџ", "р°", "сѓ", "с‚", "рµ", "рё", "с€", "с‡", "сЏ", "сЊ", "с‹", "с“", "вђ", "в€"]; for (const fragment of badFragments) { if (lower.includes(fragment)) { score -= 4; } } const cyrillic = value.match(/[А-Яа-яЁё]/g)?.length ?? 0; score += cyrillic; const replacementCount = value.match(/�/g)?.length ?? 0; score -= replacementCount * 3; return score; } function decodeUtf8FromWin1251Mojibake(value: string): string { try { const bytes = Uint8Array.from(Array.from(value).map((char) => char.charCodeAt(0) & 0xff)); const decoded = Buffer.from(bytes).toString("utf8"); return textMojibakeScore(decoded) > textMojibakeScore(value) ? decoded : value; } catch { return value; } } function decodeUtf8FromLatin1Mojibake(value: string): string { try { const decoded = Buffer.from(value, "latin1").toString("utf8"); return textMojibakeScore(decoded) > textMojibakeScore(value) ? decoded : value; } catch { return value; } } function repairTextMojibake(value: string): string { const fromWin1251 = decodeUtf8FromWin1251Mojibake(value); return decodeUtf8FromLatin1Mojibake(fromWin1251); } function buildManualCasesFromAnnotations(): ManualCase[] { const filePath = path.resolve(__dirname, "../../data/autorun_annotations/annotations.json"); const rows = JSON.parse(fs.readFileSync(filePath, "utf8")) as AnnotationRecord[]; const byKey = new Map(); for (const row of rows) { const key = `${row.run_id}::${row.case_id}`; if (MANUAL_CASE_KEYS.includes(key as (typeof MANUAL_CASE_KEYS)[number])) { byKey.set(key, row); } } const result: ManualCase[] = []; for (const key of MANUAL_CASE_KEYS) { const row = byKey.get(key); if (!row) { throw new Error(`Missing annotation for ${key}`); } const rawQuestion = String(row.context?.question_text ?? "").trim(); if (!rawQuestion) { throw new Error(`Missing question_text for ${key}`); } result.push({ runId: row.run_id, caseId: row.case_id, decision: row.manual_case_decision, question: repairTextMojibake(rawQuestion), resolved: row.resolved === true }); } return result; } const MANUAL_WAVE18_CASES = buildManualCasesFromAnnotations(); const MANUAL_LIVE_ASSERT_CASES = MANUAL_WAVE18_CASES.filter((entry) => !entry.resolved); describe("wave18 manual comments regressions", { timeout: 120000 }, () => { it("keeps manual-comment prompts in address lane (no capability/data-scope drift)", () => { for (const entry of MANUAL_WAVE18_CASES) { const decision = resolveAssistantOrchestrationDecision({ rawUserMessage: entry.question, effectiveAddressUserMessage: entry.question, followupContext: null, llmPreDecomposeMeta: null, useMock: false }); expect(decision.runAddressLane, `${entry.runId} ${entry.caseId}`).toBe(true); expect(decision.livingMode, `${entry.runId} ${entry.caseId}`).toBe("address_data"); expect(String(decision.toolGateReason), `${entry.runId} ${entry.caseId}`).not.toBe("assistant_capability_query_detected"); expect(String(decision.toolGateReason), `${entry.runId} ${entry.caseId}`).not.toBe("assistant_data_scope_query_detected"); } }); it("resolves previously-unknown manual prompts to supported intents", () => { for (const entry of MANUAL_WAVE18_CASES) { const key = `${entry.runId}::${entry.caseId}`; if (!FORMER_UNKNOWN_INTENTS.has(key)) { continue; } const intent = resolveAddressIntent(entry.question); expect(intent.intent, key).not.toBe("unknown"); } }); it( "returns handled address responses for manual-comment prompts without legacy rigid unsupported template", async () => { const service = new AddressQueryService(); for (const entry of MANUAL_LIVE_ASSERT_CASES) { const result = await service.tryHandle(entry.question); const reply = String(result?.reply_text ?? ""); expect(result, `${entry.runId} ${entry.caseId}`).not.toBeNull(); expect(result?.handled, `${entry.runId} ${entry.caseId}`).toBe(true); expect(reply, `${entry.runId} ${entry.caseId}`).not.toMatch( /Сейчас этот тип вопроса вне поддерживаемого контура адресного режима/iu ); } }, 120_000 ); });