From 278eb4abeb379f1c8339ec9542a44cff2f1884d3 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sun, 12 Apr 2026 14:54:52 +0300 Subject: [PATCH] =?UTF-8?q?=D0=93=D0=9B=D0=9E=D0=91=D0=90=D0=9B=D0=AC?= =?UTF-8?q?=D0=9D=D0=AB=D0=99=20=D0=A0=D0=95=D0=A4=D0=90=D0=9A=D0=A2=D0=9E?= =?UTF-8?q?=D0=A0=D0=98=D0=9D=D0=93=20=D0=90=D0=A0=D0=A5=D0=98=D0=A2=D0=95?= =?UTF-8?q?=D0=9A=D0=A2=D0=A3=D0=A0=D0=AB=20-=20=D0=90=D1=80=D1=85=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=D0=BA=D1=82=D1=83=D1=80=D0=B0=20=D0=BC=D0=B0=D1=80?= =?UTF-8?q?=D1=88=D1=80=D1=83=D1=82=D0=BE=D0=B2=20v2:=20baseline=20=D0=BE?= =?UTF-8?q?=D0=B6=D0=B8=D0=B4=D0=B0=D0=BD=D0=B8=D0=B9=20intentrecipe/resul?= =?UTF-8?q?t=5Fmode=20=D1=81=20runtime-=D0=B0=D1=83=D0=B4=D0=B8=D1=82?= =?UTF-8?q?=D0=BE=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/TECH/ARCH_LAYER_FOUNDATION.md | 7 + docs/TECH/address_route_expectations_v1.json | 61 +++++++ llm_normalizer/backend/dist/config.js | 6 +- .../dist/services/addressQueryService.js | 89 +++++++++- .../dist/services/addressRouteExpectations.js | 128 ++++++++++++++ .../backend/dist/services/assistantService.js | 5 + llm_normalizer/backend/src/config.ts | 8 + .../src/services/addressQueryService.ts | 108 +++++++++++- .../src/services/addressRouteExpectations.ts | 162 ++++++++++++++++++ .../backend/src/services/assistantService.ts | 6 + .../backend/src/types/addressQuery.ts | 6 + llm_normalizer/backend/src/types/assistant.ts | 5 + .../tests/addressQueryRuntimeM23.test.ts | 2 + .../tests/addressRouteExpectations.test.ts | 37 ++++ 14 files changed, 624 insertions(+), 6 deletions(-) create mode 100644 docs/TECH/address_route_expectations_v1.json create mode 100644 llm_normalizer/backend/dist/services/addressRouteExpectations.js create mode 100644 llm_normalizer/backend/src/services/addressRouteExpectations.ts create mode 100644 llm_normalizer/backend/tests/addressRouteExpectations.test.ts diff --git a/docs/TECH/ARCH_LAYER_FOUNDATION.md b/docs/TECH/ARCH_LAYER_FOUNDATION.md index 86b1012..c80d443 100644 --- a/docs/TECH/ARCH_LAYER_FOUNDATION.md +++ b/docs/TECH/ARCH_LAYER_FOUNDATION.md @@ -100,6 +100,13 @@ Route baseline contract: This baseline freezes capability mapping for key intents and acts as anti-regression control when routing evolves. +Route expectation contract (level 2): + +- `docs/TECH/address_route_expectations_v1.json` +- loader/evaluator: `llm_normalizer/backend/src/services/addressRouteExpectations.ts` + +This second-level baseline freezes expected `intent -> selected_recipe/result_mode` semantics and provides runtime audit with optional hard guard. + ## Why This Is a Foundation, Not a Patch This change does not only tune one scenario. It introduces stable contracts: diff --git a/docs/TECH/address_route_expectations_v1.json b/docs/TECH/address_route_expectations_v1.json new file mode 100644 index 0000000..31a54ec --- /dev/null +++ b/docs/TECH/address_route_expectations_v1.json @@ -0,0 +1,61 @@ +{ + "schema_version": "address_route_expectations_v1", + "updated_at": "2026-04-12T13:00:00.000Z", + "entries": [ + { + "intent": "payables_confirmed_as_of_date", + "expected_selected_recipes": ["address_payables_confirmed_as_of_date_v1"], + "expected_requested_result_modes": ["confirmed_balance"], + "expected_result_modes": ["confirmed_balance"] + }, + { + "intent": "list_payables_counterparties", + "expected_selected_recipes": ["address_movements_payables_v1", "address_open_items_by_party_or_contract_v1"], + "expected_requested_result_modes": ["heuristic_candidates", "confirmed_balance"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "list_receivables_counterparties", + "expected_selected_recipes": ["address_movements_receivables_v1", "address_open_items_by_party_or_contract_v1"], + "expected_requested_result_modes": ["heuristic_candidates", "confirmed_balance"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "account_balance_snapshot", + "expected_selected_recipes": ["address_open_items_by_party_or_contract_v1"], + "expected_requested_result_modes": ["confirmed_balance"], + "expected_result_modes": ["confirmed_balance"] + }, + { + "intent": "documents_forming_balance", + "expected_selected_recipes": ["address_open_items_by_party_or_contract_v1"], + "expected_requested_result_modes": ["confirmed_balance"], + "expected_result_modes": ["confirmed_balance"] + }, + { + "intent": "list_contracts_by_counterparty", + "expected_selected_recipes": ["address_contracts_by_counterparty_v1"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "list_documents_by_counterparty", + "expected_selected_recipes": ["address_documents_by_counterparty_v1"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "list_documents_by_contract", + "expected_selected_recipes": ["address_documents_by_contract_v1"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "bank_operations_by_counterparty", + "expected_selected_recipes": ["address_bank_operations_by_counterparty_v1"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + }, + { + "intent": "bank_operations_by_contract", + "expected_selected_recipes": ["address_bank_operations_by_contract_v1"], + "expected_result_modes": ["heuristic_candidates", "confirmed_balance"] + } + ] +} diff --git a/llm_normalizer/backend/dist/config.js b/llm_normalizer/backend/dist/config.js index d7d75ff..9d6ecb4 100644 --- a/llm_normalizer/backend/dist/config.js +++ b/llm_normalizer/backend/dist/config.js @@ -3,8 +3,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.ASSISTANT_SESSIONS_DIR = exports.EVAL_CASES_DIR = exports.PRESETS_DIR = exports.TRACES_DIR = exports.DATA_DIR = exports.VAT_PAYABLE_19_PREFIXES = exports.VAT_PAYABLE_68_PREFIXES = exports.ASSISTANT_MCP_LIVE_LIMIT = exports.ASSISTANT_MCP_TIMEOUT_MS = exports.ASSISTANT_MCP_CHANNEL = exports.ASSISTANT_MCP_PROXY_URL = exports.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1 = exports.FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1 = exports.FEATURE_ASSISTANT_ROUTE_RECEIVABLES_HEURISTIC_V1 = exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1 = exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_CONFIRMED_V1 = exports.FEATURE_ASSISTANT_ROUTE_BALANCE_EXACT_V1 = exports.FEATURE_ASSISTANT_ROUTE_DRILLDOWN_V1 = exports.FEATURE_ASSISTANT_ROUTE_ADDRESS_GENERIC_V1 = exports.FEATURE_ASSISTANT_CAPABILITY_ROUTE_GUARD_V1 = exports.FEATURE_ASSISTANT_ADDRESS_NAVIGATION_STATE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_LIVE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_V1 = exports.FEATURE_ASSISTANT_MCP_RUNTIME_V1 = exports.FEATURE_ASSISTANT_GRAPH_RUNTIME_V1 = exports.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 = exports.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1 = exports.FEATURE_ASSISTANT_STAGE2_EVAL_V1 = exports.FEATURE_ASSISTANT_PROBLEM_UNIT_CONTINUITY_V1 = exports.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1 = exports.FEATURE_ASSISTANT_PROBLEM_UNITS_V1 = exports.FEATURE_ASSISTANT_ACCOUNTANT_EVAL_V1 = exports.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = exports.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = exports.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = exports.FEATURE_ASSISTANT_BROAD_GUARD_V1 = exports.FEATURE_ASSISTANT_EVIDENCE_ENRICHMENT_V1 = exports.FEATURE_ASSISTANT_STATE_FOLLOWUP_BINDING_V1 = exports.FEATURE_ASSISTANT_CONTRACTS_V11 = exports.FEATURE_ASSISTANT_INVESTIGATION_STATE_V1 = exports.DEFAULT_PROMPT_VERSION = exports.DEFAULT_MAX_OUTPUT_TOKENS = exports.DEFAULT_TEMPERATURE = exports.DEFAULT_MODEL = exports.DEFAULT_OPENAI_BASE_URL = exports.TIMEZONE = exports.PORT = exports.MODULE_ROOT = exports.BACKEND_ROOT = void 0; -exports.MANUAL_CASE_DECISION_SCHEMA_FILE = exports.ASSISTANT_CAPABILITIES_REGISTRY_FILE = exports.ASSISTANT_CANON_FILE = exports.ARCH_EXPORT_2020_DIR = exports.SCHEMAS_DIR = exports.EVAL_DATASETS_DIR = exports.REPORTS_DIR = exports.PROMPTS_DIR = exports.AUTORUN_GENERATOR_HISTORY_FILE = exports.AUTORUN_GENERATOR_DIR = exports.AUTORUN_ANNOTATIONS_FILE = exports.AUTORUN_ANNOTATIONS_DIR = exports.ASSISTANT_ANNOTATIONS_FILE = exports.ASSISTANT_ANNOTATIONS_DIR = void 0; +exports.PRESETS_DIR = exports.TRACES_DIR = exports.DATA_DIR = exports.VAT_PAYABLE_19_PREFIXES = exports.VAT_PAYABLE_68_PREFIXES = exports.ASSISTANT_MCP_LIVE_LIMIT = exports.ASSISTANT_MCP_TIMEOUT_MS = exports.ASSISTANT_MCP_CHANNEL = exports.ASSISTANT_MCP_PROXY_URL = exports.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1 = exports.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 = exports.FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1 = exports.FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1 = exports.FEATURE_ASSISTANT_ROUTE_RECEIVABLES_HEURISTIC_V1 = exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1 = exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_CONFIRMED_V1 = exports.FEATURE_ASSISTANT_ROUTE_BALANCE_EXACT_V1 = exports.FEATURE_ASSISTANT_ROUTE_DRILLDOWN_V1 = exports.FEATURE_ASSISTANT_ROUTE_ADDRESS_GENERIC_V1 = exports.FEATURE_ASSISTANT_CAPABILITY_ROUTE_GUARD_V1 = exports.FEATURE_ASSISTANT_ADDRESS_NAVIGATION_STATE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_LIVE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1 = exports.FEATURE_ASSISTANT_ADDRESS_QUERY_V1 = exports.FEATURE_ASSISTANT_MCP_RUNTIME_V1 = exports.FEATURE_ASSISTANT_GRAPH_RUNTIME_V1 = exports.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 = exports.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1 = exports.FEATURE_ASSISTANT_STAGE2_EVAL_V1 = exports.FEATURE_ASSISTANT_PROBLEM_UNIT_CONTINUITY_V1 = exports.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1 = exports.FEATURE_ASSISTANT_PROBLEM_UNITS_V1 = exports.FEATURE_ASSISTANT_ACCOUNTANT_EVAL_V1 = exports.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = exports.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = exports.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = exports.FEATURE_ASSISTANT_BROAD_GUARD_V1 = exports.FEATURE_ASSISTANT_EVIDENCE_ENRICHMENT_V1 = exports.FEATURE_ASSISTANT_STATE_FOLLOWUP_BINDING_V1 = exports.FEATURE_ASSISTANT_CONTRACTS_V11 = exports.FEATURE_ASSISTANT_INVESTIGATION_STATE_V1 = exports.DEFAULT_PROMPT_VERSION = exports.DEFAULT_MAX_OUTPUT_TOKENS = exports.DEFAULT_TEMPERATURE = exports.DEFAULT_MODEL = exports.DEFAULT_OPENAI_BASE_URL = exports.TIMEZONE = exports.PORT = exports.MODULE_ROOT = exports.BACKEND_ROOT = void 0; +exports.MANUAL_CASE_DECISION_SCHEMA_FILE = exports.ASSISTANT_CAPABILITIES_REGISTRY_FILE = exports.ASSISTANT_CANON_FILE = exports.ARCH_EXPORT_2020_DIR = exports.SCHEMAS_DIR = exports.EVAL_DATASETS_DIR = exports.REPORTS_DIR = exports.PROMPTS_DIR = exports.AUTORUN_GENERATOR_HISTORY_FILE = exports.AUTORUN_GENERATOR_DIR = exports.AUTORUN_ANNOTATIONS_FILE = exports.AUTORUN_ANNOTATIONS_DIR = exports.ASSISTANT_ANNOTATIONS_FILE = exports.ASSISTANT_ANNOTATIONS_DIR = exports.ASSISTANT_SESSIONS_DIR = exports.EVAL_CASES_DIR = void 0; const path_1 = __importDefault(require("path")); exports.BACKEND_ROOT = path_1.default.resolve(__dirname, ".."); exports.MODULE_ROOT = path_1.default.resolve(exports.BACKEND_ROOT, ".."); @@ -69,6 +69,8 @@ exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_CONFIRMED_V1 = toBooleanFlag(process.en exports.FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1, true); exports.FEATURE_ASSISTANT_ROUTE_RECEIVABLES_HEURISTIC_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_ROUTE_RECEIVABLES_HEURISTIC_V1, true); exports.FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1, false); +exports.FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1, true); +exports.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1, false); exports.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1 = toBooleanFlag(process.env.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1, true); exports.ASSISTANT_MCP_PROXY_URL = (process.env.ASSISTANT_MCP_PROXY_URL ?? "http://127.0.0.1:6003").replace(/\/+$/, ""); exports.ASSISTANT_MCP_CHANNEL = process.env.ASSISTANT_MCP_CHANNEL ?? "default"; diff --git a/llm_normalizer/backend/dist/services/addressQueryService.js b/llm_normalizer/backend/dist/services/addressQueryService.js index bf653a5..b0912d6 100644 --- a/llm_normalizer/backend/dist/services/addressQueryService.js +++ b/llm_normalizer/backend/dist/services/addressQueryService.js @@ -8,6 +8,7 @@ const decomposeStage_1 = require("./address_runtime/decomposeStage"); const resolveStage_1 = require("./address_runtime/resolveStage"); const composeStage_1 = require("./address_runtime/composeStage"); const addressCapabilityPolicy_1 = require("./addressCapabilityPolicy"); +const addressRouteExpectations_1 = require("./addressRouteExpectations"); const ACCOUNT_SCOPE_FIELDS_CHECKED = ["account_dt", "account_kt", "registrator", "analytics"]; const ACCOUNT_SCOPE_MATCH_STRATEGY = "account_code_regex_plus_alias_map_v1"; const ADDRESS_ANCHOR_RECOVERY_LIMIT = 1000; @@ -781,6 +782,30 @@ function buildShadowRouteAudit(input) { status: "planned" }; } +function buildRouteExpectationAudit(input) { + if (!config_1.FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1) { + return { + status: "not_found", + reason: "route_expectation_audit_disabled", + expectedSelectedRecipes: [], + expectedRequestedResultModes: [], + expectedResultModes: [] + }; + } + const audit = (0, addressRouteExpectations_1.evaluateAddressRouteExpectation)({ + intent: input.intent, + selectedRecipe: input.selectedRecipe, + requestedResultMode: input.requestedResultMode, + resultMode: input.resultMode + }); + return { + status: audit.status, + reason: audit.reason, + expectedSelectedRecipes: audit.expected_selected_recipes, + expectedRequestedResultModes: audit.expected_requested_result_modes, + expectedResultModes: audit.expected_result_modes + }; +} function enforceStrictAccountScopeForIntent(plan, intent) { if (intent !== "list_receivables_counterparties" || plan.account_scope_mode === "strict") { return plan; @@ -1395,6 +1420,13 @@ function buildLimitedExecutionResult(input) { !reasonsWithConfirmedFallback.includes("exact_payables_mode_limited_response") ? [...reasonsWithConfirmedFallback, "exact_payables_mode_limited_response"] : reasonsWithConfirmedFallback; + const routeExpectationAudit = input.routeExpectationAudit ?? + buildRouteExpectationAudit({ + intent: input.intent.intent, + selectedRecipe: input.selectedRecipe, + requestedResultMode: requestedResultMode, + resultMode: resultSemantics.result_mode + }); return { handled: true, reply_text: composeLimitedReply({ @@ -1453,6 +1485,11 @@ function buildLimitedExecutionResult(input) { shadow_route_intent: input.shadowRouteAudit?.intent ?? null, shadow_route_selected_recipe: input.shadowRouteAudit?.selectedRecipe ?? null, shadow_route_status: input.shadowRouteAudit?.status ?? "skipped", + route_expectation_status: routeExpectationAudit.status, + route_expectation_reason: routeExpectationAudit.reason, + route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, + route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, + route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, ...resultSemantics, limitations: input.limitations, reasons @@ -2451,6 +2488,45 @@ class AddressQueryService { responseType: factual.responseType, rowsMatched: filteredRows.length }), factual.semantics); + const finalRouteExpectationAudit = buildRouteExpectationAudit({ + intent: intent.intent, + selectedRecipe: effectiveRecipeId, + requestedResultMode, + resultMode: factualResultSemantics.result_mode + }); + if (finalRouteExpectationAudit.status === "mismatch" && config_1.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1) { + return buildLimitedExecutionResult({ + mode, + shape, + intent, + filters: filters.extracted_filters, + missingRequiredFilters: [], + selectedRecipe: effectiveRecipeId, + accountScopeMode: plan.account_scope_mode, + accountScopeFallbackApplied, + accountScopeAudit, + anchor, + matchFailureStage, + matchFailureReason, + mcpCallStatus: stageStatus, + rowsFetched: mcp.fetched_rows, + rawRowsReceived: mcp.raw_rows.length, + rowsAfterAccountScope: normalizedRows.length, + rowsAfterRecipeFilter: filterByAnchors.length, + rowsMaterialized: normalizedRows.length, + rowsMatched: filteredRows.length, + rawRowKeysSample: rowDiagnostics.rawRowKeysSample, + materializationDropReason: rowDiagnostics.materializationDropReason, + category: "recipe_visibility_gap", + reasonText: "маршрут не прошел baseline route expectation contract", + nextStep: "проверьте intent/recipe mapping или отключите FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 для безопасного rollout", + limitations: ["route_expectation_mismatch_guard_blocked"], + reasons: [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`], + capabilityAudit, + shadowRouteAudit, + routeExpectationAudit: finalRouteExpectationAudit + }); + } if (intent.intent === "payables_confirmed_as_of_date" && factualResultSemantics.balance_confirmed !== true) { return buildLimitedExecutionResult({ mode, @@ -2480,9 +2556,13 @@ class AddressQueryService { limitations: ["exact_payables_mode_unconfirmed_output_blocked"], reasons: [...baseReasons, "exact_payables_mode_unconfirmed_output_blocked"], capabilityAudit, - shadowRouteAudit + shadowRouteAudit, + routeExpectationAudit: finalRouteExpectationAudit }); } + const reasonsWithRouteExpectation = finalRouteExpectationAudit.status === "mismatch" + ? [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`] + : baseReasons; return { handled: true, reply_text: factual.text, @@ -2533,9 +2613,14 @@ class AddressQueryService { shadow_route_intent: shadowRouteAudit.intent, shadow_route_selected_recipe: shadowRouteAudit.selectedRecipe, shadow_route_status: shadowRouteAudit.status, + route_expectation_status: finalRouteExpectationAudit.status, + route_expectation_reason: finalRouteExpectationAudit.reason, + route_expectation_expected_selected_recipes: finalRouteExpectationAudit.expectedSelectedRecipes, + route_expectation_expected_requested_result_modes: finalRouteExpectationAudit.expectedRequestedResultModes, + route_expectation_expected_result_modes: finalRouteExpectationAudit.expectedResultModes, ...factualResultSemantics, limitations: filters.warnings, - reasons: withConfirmedBalanceFallbackReason(baseReasons, requestedResultMode, factual.semantics, factualResultSemantics.result_mode) + reasons: withConfirmedBalanceFallbackReason(reasonsWithRouteExpectation, requestedResultMode, factual.semantics, factualResultSemantics.result_mode) } }; } diff --git a/llm_normalizer/backend/dist/services/addressRouteExpectations.js b/llm_normalizer/backend/dist/services/addressRouteExpectations.js new file mode 100644 index 0000000..083b21b --- /dev/null +++ b/llm_normalizer/backend/dist/services/addressRouteExpectations.js @@ -0,0 +1,128 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.loadAddressRouteExpectationsContract = loadAddressRouteExpectationsContract; +exports.evaluateAddressRouteExpectation = evaluateAddressRouteExpectation; +const fs_1 = __importDefault(require("fs")); +const path_1 = __importDefault(require("path")); +const EXPECTATIONS_FILE = path_1.default.resolve(__dirname, "..", "..", "..", "..", "docs", "TECH", "address_route_expectations_v1.json"); +function toObject(value) { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + return value; +} +function toNonEmptyString(value) { + if (typeof value !== "string") { + return null; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} +function toStringArray(value) { + if (!Array.isArray(value)) { + return []; + } + return value.map((item) => toNonEmptyString(item)).filter((item) => Boolean(item)); +} +function parseResultModes(value) { + const raw = toStringArray(value); + return raw.filter((mode) => mode === "heuristic_candidates" || mode === "confirmed_balance"); +} +function parseEntry(value) { + const object = toObject(value); + if (!object) { + return null; + } + const intent = toNonEmptyString(object.intent); + const expectedSelectedRecipes = toStringArray(object.expected_selected_recipes); + if (!intent || expectedSelectedRecipes.length === 0) { + return null; + } + const expectedRequestedResultModes = parseResultModes(object.expected_requested_result_modes); + const expectedResultModes = parseResultModes(object.expected_result_modes); + return { + intent, + expected_selected_recipes: expectedSelectedRecipes, + ...(expectedRequestedResultModes.length > 0 ? { expected_requested_result_modes: expectedRequestedResultModes } : {}), + ...(expectedResultModes.length > 0 ? { expected_result_modes: expectedResultModes } : {}) + }; +} +function loadAddressRouteExpectationsContract() { + const raw = fs_1.default.readFileSync(EXPECTATIONS_FILE, "utf-8"); + const parsed = JSON.parse(raw); + const root = toObject(parsed); + if (!root) { + throw new Error("address_route_expectations_v1: invalid root payload"); + } + const schemaVersion = toNonEmptyString(root.schema_version); + if (schemaVersion !== "address_route_expectations_v1") { + throw new Error(`address_route_expectations_v1: unexpected schema version '${schemaVersion ?? "null"}'`); + } + const updatedAt = toNonEmptyString(root.updated_at) ?? new Date().toISOString(); + const entriesRaw = Array.isArray(root.entries) ? root.entries : []; + const entries = entriesRaw.map(parseEntry).filter((entry) => entry !== null); + if (entries.length === 0) { + throw new Error("address_route_expectations_v1: no valid entries"); + } + return { + schema_version: "address_route_expectations_v1", + updated_at: updatedAt, + entries + }; +} +function evaluateAddressRouteExpectation(input) { + const contract = loadAddressRouteExpectationsContract(); + const entry = contract.entries.find((item) => item.intent === input.intent); + if (!entry) { + return { + status: "not_found", + reason: "route_expectation_not_defined_for_intent", + expected_selected_recipes: [], + expected_requested_result_modes: [], + expected_result_modes: [] + }; + } + if (input.selectedRecipe && !entry.expected_selected_recipes.includes(input.selectedRecipe)) { + return { + status: "mismatch", + reason: "selected_recipe_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes ?? [] + }; + } + if (input.requestedResultMode && + Array.isArray(entry.expected_requested_result_modes) && + entry.expected_requested_result_modes.length > 0 && + !entry.expected_requested_result_modes.includes(input.requestedResultMode)) { + return { + status: "mismatch", + reason: "requested_result_mode_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes, + expected_result_modes: entry.expected_result_modes ?? [] + }; + } + if (input.resultMode && + Array.isArray(entry.expected_result_modes) && + entry.expected_result_modes.length > 0 && + !entry.expected_result_modes.includes(input.resultMode)) { + return { + status: "mismatch", + reason: "result_mode_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes + }; + } + return { + status: "matched", + reason: "route_expectation_matched", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes ?? [] + }; +} diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 082c1ac..6cd417e 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -1473,6 +1473,11 @@ function buildAddressDebugPayload(addressDebug, llmPreDecomposeMeta = null) { shadow_route_intent: addressDebug.shadow_route_intent ?? undefined, shadow_route_selected_recipe: addressDebug.shadow_route_selected_recipe ?? undefined, shadow_route_status: addressDebug.shadow_route_status ?? undefined, + route_expectation_status: addressDebug.route_expectation_status ?? undefined, + route_expectation_reason: addressDebug.route_expectation_reason ?? undefined, + route_expectation_expected_selected_recipes: addressDebug.route_expectation_expected_selected_recipes ?? undefined, + route_expectation_expected_requested_result_modes: addressDebug.route_expectation_expected_requested_result_modes ?? undefined, + route_expectation_expected_result_modes: addressDebug.route_expectation_expected_result_modes ?? undefined, execution_lane: "address_query", llm_decomposition_applied: Boolean(llmMeta?.applied), llm_decomposition_attempted: Boolean(llmMeta?.attempted), diff --git a/llm_normalizer/backend/src/config.ts b/llm_normalizer/backend/src/config.ts index 8c9cc88..b0ccd18 100644 --- a/llm_normalizer/backend/src/config.ts +++ b/llm_normalizer/backend/src/config.ts @@ -151,6 +151,14 @@ export const FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1 = toBooleanFlag( process.env.FEATURE_ASSISTANT_ROUTE_SHADOW_PAYABLES_EXACT_V1, false ); +export const FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1 = toBooleanFlag( + process.env.FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1, + true +); +export const FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 = toBooleanFlag( + process.env.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1, + false +); export const FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1 = toBooleanFlag( process.env.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1, true diff --git a/llm_normalizer/backend/src/services/addressQueryService.ts b/llm_normalizer/backend/src/services/addressQueryService.ts index 42a0d7c..60c81c2 100644 --- a/llm_normalizer/backend/src/services/addressQueryService.ts +++ b/llm_normalizer/backend/src/services/addressQueryService.ts @@ -1,5 +1,7 @@ import { FEATURE_ASSISTANT_CAPABILITY_ROUTE_GUARD_V1, + FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1, + FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1, FEATURE_ASSISTANT_ADDRESS_QUERY_V1, FEATURE_ASSISTANT_ADDRESS_QUERY_LIVE_V1 } from "../config"; @@ -34,6 +36,7 @@ import { resolveAddressCapabilityRouteDecision, resolveShadowRouteIntent } from "./addressCapabilityPolicy"; +import { evaluateAddressRouteExpectation, type AddressRouteExpectationAudit } from "./addressRouteExpectations"; interface NormalizedAddressRow { period: string | null; @@ -63,6 +66,14 @@ interface AddressShadowRouteAudit { status: AddressShadowRouteStatus; } +interface AddressRouteExpectationAuditState { + status: AddressRouteExpectationAudit["status"]; + reason: string; + expectedSelectedRecipes: string[]; + expectedRequestedResultModes: AddressResultMode[]; + expectedResultModes: AddressResultMode[]; +} + const ACCOUNT_SCOPE_FIELDS_CHECKED = ["account_dt", "account_kt", "registrator", "analytics"] as const; const ACCOUNT_SCOPE_MATCH_STRATEGY = "account_code_regex_plus_alias_map_v1" as const; const ADDRESS_ANCHOR_RECOVERY_LIMIT = 1000; @@ -966,6 +977,36 @@ function buildShadowRouteAudit(input: { }; } +function buildRouteExpectationAudit(input: { + intent: AddressIntent; + selectedRecipe: string | null; + requestedResultMode?: AddressResultMode; + resultMode?: AddressResultMode; +}): AddressRouteExpectationAuditState { + if (!FEATURE_ASSISTANT_ROUTE_EXPECTATION_AUDIT_V1) { + return { + status: "not_found", + reason: "route_expectation_audit_disabled", + expectedSelectedRecipes: [], + expectedRequestedResultModes: [], + expectedResultModes: [] + }; + } + const audit = evaluateAddressRouteExpectation({ + intent: input.intent, + selectedRecipe: input.selectedRecipe, + requestedResultMode: input.requestedResultMode, + resultMode: input.resultMode + }); + return { + status: audit.status, + reason: audit.reason, + expectedSelectedRecipes: audit.expected_selected_recipes, + expectedRequestedResultModes: audit.expected_requested_result_modes, + expectedResultModes: audit.expected_result_modes + }; +} + function enforceStrictAccountScopeForIntent( plan: AddressRecipeExecutionPlan, intent: AddressIntent @@ -1765,6 +1806,7 @@ function buildLimitedExecutionResult(input: { category: AddressLimitedReasonCategory; capabilityAudit?: AddressCapabilityAudit; shadowRouteAudit?: AddressShadowRouteAudit; + routeExpectationAudit?: AddressRouteExpectationAuditState; }): AddressExecutionResult { const accountScopeAudit = input.accountScopeAudit ?? buildDefaultAccountScopeAudit(input.filters); const resultSemantics = deriveAddressResultSemantics({ @@ -1786,6 +1828,14 @@ function buildLimitedExecutionResult(input: { !reasonsWithConfirmedFallback.includes("exact_payables_mode_limited_response") ? [...reasonsWithConfirmedFallback, "exact_payables_mode_limited_response"] : reasonsWithConfirmedFallback; + const routeExpectationAudit = + input.routeExpectationAudit ?? + buildRouteExpectationAudit({ + intent: input.intent.intent, + selectedRecipe: input.selectedRecipe, + requestedResultMode: requestedResultMode, + resultMode: resultSemantics.result_mode + }); return { handled: true, reply_text: composeLimitedReply({ @@ -1844,6 +1894,11 @@ function buildLimitedExecutionResult(input: { shadow_route_intent: input.shadowRouteAudit?.intent ?? null, shadow_route_selected_recipe: input.shadowRouteAudit?.selectedRecipe ?? null, shadow_route_status: input.shadowRouteAudit?.status ?? "skipped", + route_expectation_status: routeExpectationAudit.status, + route_expectation_reason: routeExpectationAudit.reason, + route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, + route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, + route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, ...resultSemantics, limitations: input.limitations, reasons @@ -2991,6 +3046,45 @@ export class AddressQueryService { }), factual.semantics ); + const finalRouteExpectationAudit = buildRouteExpectationAudit({ + intent: intent.intent, + selectedRecipe: effectiveRecipeId, + requestedResultMode, + resultMode: factualResultSemantics.result_mode + }); + if (finalRouteExpectationAudit.status === "mismatch" && FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1) { + return buildLimitedExecutionResult({ + mode, + shape, + intent, + filters: filters.extracted_filters, + missingRequiredFilters: [], + selectedRecipe: effectiveRecipeId, + accountScopeMode: plan.account_scope_mode, + accountScopeFallbackApplied, + accountScopeAudit, + anchor, + matchFailureStage, + matchFailureReason, + mcpCallStatus: stageStatus, + rowsFetched: mcp.fetched_rows, + rawRowsReceived: mcp.raw_rows.length, + rowsAfterAccountScope: normalizedRows.length, + rowsAfterRecipeFilter: filterByAnchors.length, + rowsMaterialized: normalizedRows.length, + rowsMatched: filteredRows.length, + rawRowKeysSample: rowDiagnostics.rawRowKeysSample, + materializationDropReason: rowDiagnostics.materializationDropReason, + category: "recipe_visibility_gap", + reasonText: "маршрут не прошел baseline route expectation contract", + nextStep: "проверьте intent/recipe mapping или отключите FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 для безопасного rollout", + limitations: ["route_expectation_mismatch_guard_blocked"], + reasons: [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`], + capabilityAudit, + shadowRouteAudit, + routeExpectationAudit: finalRouteExpectationAudit + }); + } if (intent.intent === "payables_confirmed_as_of_date" && factualResultSemantics.balance_confirmed !== true) { return buildLimitedExecutionResult({ mode, @@ -3020,9 +3114,14 @@ export class AddressQueryService { limitations: ["exact_payables_mode_unconfirmed_output_blocked"], reasons: [...baseReasons, "exact_payables_mode_unconfirmed_output_blocked"], capabilityAudit, - shadowRouteAudit + shadowRouteAudit, + routeExpectationAudit: finalRouteExpectationAudit }); } + const reasonsWithRouteExpectation = + finalRouteExpectationAudit.status === "mismatch" + ? [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`] + : baseReasons; return { handled: true, reply_text: factual.text, @@ -3073,10 +3172,15 @@ export class AddressQueryService { shadow_route_intent: shadowRouteAudit.intent, shadow_route_selected_recipe: shadowRouteAudit.selectedRecipe, shadow_route_status: shadowRouteAudit.status, + route_expectation_status: finalRouteExpectationAudit.status, + route_expectation_reason: finalRouteExpectationAudit.reason, + route_expectation_expected_selected_recipes: finalRouteExpectationAudit.expectedSelectedRecipes, + route_expectation_expected_requested_result_modes: finalRouteExpectationAudit.expectedRequestedResultModes, + route_expectation_expected_result_modes: finalRouteExpectationAudit.expectedResultModes, ...factualResultSemantics, limitations: filters.warnings, reasons: withConfirmedBalanceFallbackReason( - baseReasons, + reasonsWithRouteExpectation, requestedResultMode, factual.semantics, factualResultSemantics.result_mode diff --git a/llm_normalizer/backend/src/services/addressRouteExpectations.ts b/llm_normalizer/backend/src/services/addressRouteExpectations.ts new file mode 100644 index 0000000..c6562c1 --- /dev/null +++ b/llm_normalizer/backend/src/services/addressRouteExpectations.ts @@ -0,0 +1,162 @@ +import fs from "fs"; +import path from "path"; +import type { AddressIntent, AddressResultMode } from "../types/addressQuery"; + +export type AddressRouteExpectationStatus = "matched" | "mismatch" | "not_found"; + +export interface AddressRouteExpectationEntry { + intent: AddressIntent; + expected_selected_recipes: string[]; + expected_requested_result_modes?: AddressResultMode[]; + expected_result_modes?: AddressResultMode[]; +} + +export interface AddressRouteExpectationsContract { + schema_version: "address_route_expectations_v1"; + updated_at: string; + entries: AddressRouteExpectationEntry[]; +} + +export interface AddressRouteExpectationAudit { + status: AddressRouteExpectationStatus; + reason: string; + expected_selected_recipes: string[]; + expected_requested_result_modes: AddressResultMode[]; + expected_result_modes: AddressResultMode[]; +} + +const EXPECTATIONS_FILE = path.resolve(__dirname, "..", "..", "..", "..", "docs", "TECH", "address_route_expectations_v1.json"); + +function toObject(value: unknown): Record | null { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + return value as Record; +} + +function toNonEmptyString(value: unknown): string | null { + if (typeof value !== "string") { + return null; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} + +function toStringArray(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.map((item) => toNonEmptyString(item)).filter((item): item is string => Boolean(item)); +} + +function parseResultModes(value: unknown): AddressResultMode[] { + const raw = toStringArray(value); + return raw.filter((mode): mode is AddressResultMode => mode === "heuristic_candidates" || mode === "confirmed_balance"); +} + +function parseEntry(value: unknown): AddressRouteExpectationEntry | null { + const object = toObject(value); + if (!object) { + return null; + } + const intent = toNonEmptyString(object.intent) as AddressIntent | null; + const expectedSelectedRecipes = toStringArray(object.expected_selected_recipes); + if (!intent || expectedSelectedRecipes.length === 0) { + return null; + } + const expectedRequestedResultModes = parseResultModes(object.expected_requested_result_modes); + const expectedResultModes = parseResultModes(object.expected_result_modes); + return { + intent, + expected_selected_recipes: expectedSelectedRecipes, + ...(expectedRequestedResultModes.length > 0 ? { expected_requested_result_modes: expectedRequestedResultModes } : {}), + ...(expectedResultModes.length > 0 ? { expected_result_modes: expectedResultModes } : {}) + }; +} + +export function loadAddressRouteExpectationsContract(): AddressRouteExpectationsContract { + const raw = fs.readFileSync(EXPECTATIONS_FILE, "utf-8"); + const parsed = JSON.parse(raw) as unknown; + const root = toObject(parsed); + if (!root) { + throw new Error("address_route_expectations_v1: invalid root payload"); + } + const schemaVersion = toNonEmptyString(root.schema_version); + if (schemaVersion !== "address_route_expectations_v1") { + throw new Error(`address_route_expectations_v1: unexpected schema version '${schemaVersion ?? "null"}'`); + } + const updatedAt = toNonEmptyString(root.updated_at) ?? new Date().toISOString(); + const entriesRaw = Array.isArray(root.entries) ? root.entries : []; + const entries = entriesRaw.map(parseEntry).filter((entry): entry is AddressRouteExpectationEntry => entry !== null); + if (entries.length === 0) { + throw new Error("address_route_expectations_v1: no valid entries"); + } + return { + schema_version: "address_route_expectations_v1", + updated_at: updatedAt, + entries + }; +} + +export function evaluateAddressRouteExpectation(input: { + intent: AddressIntent; + selectedRecipe: string | null; + requestedResultMode?: AddressResultMode; + resultMode?: AddressResultMode; +}): AddressRouteExpectationAudit { + const contract = loadAddressRouteExpectationsContract(); + const entry = contract.entries.find((item) => item.intent === input.intent); + if (!entry) { + return { + status: "not_found", + reason: "route_expectation_not_defined_for_intent", + expected_selected_recipes: [], + expected_requested_result_modes: [], + expected_result_modes: [] + }; + } + if (input.selectedRecipe && !entry.expected_selected_recipes.includes(input.selectedRecipe)) { + return { + status: "mismatch", + reason: "selected_recipe_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes ?? [] + }; + } + if ( + input.requestedResultMode && + Array.isArray(entry.expected_requested_result_modes) && + entry.expected_requested_result_modes.length > 0 && + !entry.expected_requested_result_modes.includes(input.requestedResultMode) + ) { + return { + status: "mismatch", + reason: "requested_result_mode_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes, + expected_result_modes: entry.expected_result_modes ?? [] + }; + } + if ( + input.resultMode && + Array.isArray(entry.expected_result_modes) && + entry.expected_result_modes.length > 0 && + !entry.expected_result_modes.includes(input.resultMode) + ) { + return { + status: "mismatch", + reason: "result_mode_mismatch", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes + }; + } + return { + status: "matched", + reason: "route_expectation_matched", + expected_selected_recipes: entry.expected_selected_recipes, + expected_requested_result_modes: entry.expected_requested_result_modes ?? [], + expected_result_modes: entry.expected_result_modes ?? [] + }; +} diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index f7778da..ad025cb 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -1427,6 +1427,12 @@ function buildAddressDebugPayload(addressDebug, llmPreDecomposeMeta = null) { shadow_route_intent: addressDebug.shadow_route_intent ?? undefined, shadow_route_selected_recipe: addressDebug.shadow_route_selected_recipe ?? undefined, shadow_route_status: addressDebug.shadow_route_status ?? undefined, + route_expectation_status: addressDebug.route_expectation_status ?? undefined, + route_expectation_reason: addressDebug.route_expectation_reason ?? undefined, + route_expectation_expected_selected_recipes: addressDebug.route_expectation_expected_selected_recipes ?? undefined, + route_expectation_expected_requested_result_modes: + addressDebug.route_expectation_expected_requested_result_modes ?? undefined, + route_expectation_expected_result_modes: addressDebug.route_expectation_expected_result_modes ?? undefined, execution_lane: "address_query", llm_decomposition_applied: Boolean(llmMeta?.applied), llm_decomposition_attempted: Boolean(llmMeta?.attempted), diff --git a/llm_normalizer/backend/src/types/addressQuery.ts b/llm_normalizer/backend/src/types/addressQuery.ts index bf18521..e64e58b 100644 --- a/llm_normalizer/backend/src/types/addressQuery.ts +++ b/llm_normalizer/backend/src/types/addressQuery.ts @@ -31,6 +31,7 @@ export type AddressAsOfDateBasis = "period_end" | "explicit_as_of_date" | "perio export type AddressCapabilityLayer = "compute" | "navigation" | "conversational"; export type AddressCapabilityRouteMode = "exact" | "heuristic"; export type AddressShadowRouteStatus = "skipped" | "planned" | "unavailable"; +export type AddressRouteExpectationStatus = "matched" | "mismatch" | "not_found"; export type AddressQueryShape = | "AGGREGATE_LOOKUP" @@ -209,6 +210,11 @@ export interface AddressExecutionDebug { shadow_route_intent?: AddressIntent | null; shadow_route_selected_recipe?: string | null; shadow_route_status?: AddressShadowRouteStatus | null; + route_expectation_status?: AddressRouteExpectationStatus | null; + route_expectation_reason?: string | null; + route_expectation_expected_selected_recipes?: string[]; + route_expectation_expected_requested_result_modes?: AddressResultMode[]; + route_expectation_expected_result_modes?: AddressResultMode[]; limitations: string[]; reasons: string[]; } diff --git a/llm_normalizer/backend/src/types/assistant.ts b/llm_normalizer/backend/src/types/assistant.ts index ca62e94..8e5ae86 100644 --- a/llm_normalizer/backend/src/types/assistant.ts +++ b/llm_normalizer/backend/src/types/assistant.ts @@ -441,6 +441,11 @@ export interface AssistantDebugPayload { shadow_route_intent?: string | null; shadow_route_selected_recipe?: string | null; shadow_route_status?: "skipped" | "planned" | "unavailable" | null; + route_expectation_status?: "matched" | "mismatch" | "not_found" | null; + route_expectation_reason?: string | null; + route_expectation_expected_selected_recipes?: string[]; + route_expectation_expected_requested_result_modes?: Array<"heuristic_candidates" | "confirmed_balance">; + route_expectation_expected_result_modes?: Array<"heuristic_candidates" | "confirmed_balance">; execution_lane?: "address_query" | "deep_analysis"; llm_decomposition_applied?: boolean; llm_decomposition_attempted?: boolean; diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index b984dd9..e9f1217 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -2503,6 +2503,8 @@ describe("address query limited taxonomy and stage diagnostics", { timeout: 1500 expect(result?.debug.result_mode).toBe("confirmed_balance"); expect(result?.debug.as_of_date_basis).toBe("explicit_as_of_date"); expect(result?.debug.selected_recipe).toBe("address_payables_confirmed_as_of_date_v1"); + expect(result?.debug.route_expectation_status).toBe("matched"); + expect(result?.debug.route_expectation_reason).toBe("route_expectation_matched"); expect(Array.isArray(result?.debug.reasons)).toBe(true); expect(result?.debug.reasons).not.toContain("confirmed_balance_unavailable_fallback_to_heuristic_candidates"); expect(["FACTUAL_LIST", "FACTUAL_SUMMARY", "LIMITED_WITH_REASON"]).toContain(result?.response_type); diff --git a/llm_normalizer/backend/tests/addressRouteExpectations.test.ts b/llm_normalizer/backend/tests/addressRouteExpectations.test.ts new file mode 100644 index 0000000..ad3fe7e --- /dev/null +++ b/llm_normalizer/backend/tests/addressRouteExpectations.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "vitest"; +import { + evaluateAddressRouteExpectation, + loadAddressRouteExpectationsContract +} from "../src/services/addressRouteExpectations"; + +describe("address route expectations contract", () => { + it("loads expectations contract with entries", () => { + const contract = loadAddressRouteExpectationsContract(); + expect(contract.schema_version).toBe("address_route_expectations_v1"); + expect(Array.isArray(contract.entries)).toBe(true); + expect(contract.entries.length).toBeGreaterThan(0); + }); + + it("matches expected recipe and result mode for exact payables route", () => { + const audit = evaluateAddressRouteExpectation({ + intent: "payables_confirmed_as_of_date", + selectedRecipe: "address_payables_confirmed_as_of_date_v1", + requestedResultMode: "confirmed_balance", + resultMode: "confirmed_balance" + }); + expect(audit.status).toBe("matched"); + expect(audit.reason).toBe("route_expectation_matched"); + }); + + it("detects selected recipe mismatch", () => { + const audit = evaluateAddressRouteExpectation({ + intent: "payables_confirmed_as_of_date", + selectedRecipe: "address_movements_payables_v1", + requestedResultMode: "confirmed_balance", + resultMode: "confirmed_balance" + }); + expect(audit.status).toBe("mismatch"); + expect(audit.reason).toBe("selected_recipe_mismatch"); + expect(audit.expected_selected_recipes).toContain("address_payables_confirmed_as_of_date_v1"); + }); +});