From b3572d9d113b2ad82176c027d8c56ccad3305b56 Mon Sep 17 00:00:00 2001 From: dctouch Date: Fri, 17 Apr 2026 11:14:10 +0300 Subject: [PATCH] =?UTF-8?q?=D0=90=D0=A0=D0=A7=20=D0=90=D0=9F11=20-=20?= =?UTF-8?q?=D0=92=D1=8B=D0=BD=D0=B5=D1=81=D1=82=D0=B8=20coverage=20evidenc?= =?UTF-8?q?e=20contract=20=D0=B4=D0=BB=D1=8F=20exact=20lane=20=D0=B8=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=B2=D0=B5=D1=80=D1=88=D0=B8=D1=82=D1=8C=20phase?= =?UTF-8?q?=204=20=D0=B0=D0=B3=D0=B5=D0=BD=D1=82=D0=BD=D1=8B=D0=BC=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B3=D0=BE=D0=BD=D0=BE=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ..._harness_phase4_coverage_evidence_mix.json | 114 +++++ .../services/addressCoverageEvidencePolicy.js | 299 ++++++++++++++ .../dist/services/addressQueryService.js | 231 ++++------- ...ssistantTruthAnswerPolicyRuntimeAdapter.js | 14 +- .../services/addressCoverageEvidencePolicy.ts | 391 ++++++++++++++++++ .../src/services/addressQueryService.ts | 345 ++++++---------- ...ssistantTruthAnswerPolicyRuntimeAdapter.ts | 15 +- .../backend/src/types/addressQuery.ts | 19 + ...nterpartyItemFlowAndOpenItemsRoute.test.ts | 4 + .../addressCoverageEvidencePolicy.test.ts | 90 ++++ ...essInventorySelectedObjectFollowup.test.ts | 2 + ...antTruthAnswerPolicyRuntimeAdapter.test.ts | 14 + .../data/autorun_generators/history.json | 35 +- ..._20260417080808_gen-ag04170808-1907fa.json | 83 ++++ ..._20260417080808_gen-ag04170808-1907fa.json | 40 ++ ..._saved_session_runtime_job-GwhfwhCDWz.json | 36 ++ 16 files changed, 1357 insertions(+), 375 deletions(-) create mode 100644 docs/orchestration/address_truth_harness_phase4_coverage_evidence_mix.json create mode 100644 llm_normalizer/backend/dist/services/addressCoverageEvidencePolicy.js create mode 100644 llm_normalizer/backend/src/services/addressCoverageEvidencePolicy.ts create mode 100644 llm_normalizer/backend/tests/addressCoverageEvidencePolicy.test.ts create mode 100644 llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260417080808_gen-ag04170808-1907fa.json create mode 100644 llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260417080808_gen-ag04170808-1907fa.json create mode 100644 llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-GwhfwhCDWz.json diff --git a/docs/orchestration/address_truth_harness_phase4_coverage_evidence_mix.json b/docs/orchestration/address_truth_harness_phase4_coverage_evidence_mix.json new file mode 100644 index 0000000..d15c3c9 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase4_coverage_evidence_mix.json @@ -0,0 +1,114 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase4_coverage_evidence_mix", + "domain": "address_phase4_coverage_evidence_mix", + "title": "Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "description": "Targeted mixed-domain replay for the explicit coverage/evidence contract: matched factual rows, blocked explanatory fallback, root reset, and temporal-limited selected-object provenance.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_counterparty_documents", + "title": "Counterparty documents use the normalized legal name", + "question": "покажи все документы по чепурнову", + "allowed_reply_types": [ + "factual" + ], + "expected_intents": [ + "list_documents_by_counterparty" + ], + "required_direct_answer_patterns_any": [ + "(?i)контрагент:", + "(?i)чепурнов" + ] + }, + { + "step_id": "step_02_counterparty_shipments_or_fallback", + "title": "Supplier shipment question stays human even when exact supply rows are absent", + "question": "что нам отгружал чепурнов, какой товар или услугу?", + "allowed_reply_types": [ + "factual", + "partial_coverage" + ], + "expected_intents": [ + "list_documents_by_counterparty" + ], + "required_direct_answer_patterns_any": [ + "(?i)чепурнов", + "(?i)постав", + "(?i)оплат|возврат|товар|услуг" + ], + "forbidden_direct_answer_patterns": [ + "(?i)^сейчас не дам прямой адресный ответ", + "(?i)^в текущем адресном контуре этот запрос лучше не закрывать в лоб" + ] + }, + { + "step_id": "step_03_inventory_reset_march_2021", + "title": "Inventory root resets cleanly after the counterparty branch", + "question": "какие остатки на складе на март 2021", + "allowed_reply_types": [ + "factual" + ], + "expected_intents": [ + "inventory_on_hand_as_of_date" + ], + "required_filters": { + "as_of_date": "2021-03-31", + "period_from": "2021-03-01", + "period_to": "2021-03-31" + }, + "required_direct_answer_patterns_any": [ + "31\\.03\\.2021", + "(?i)на складе" + ], + "forbidden_direct_answer_patterns": [ + "(?i)чепурнов", + "(?i)контрагент:" + ] + }, + { + "step_id": "step_04_selected_item_supplier_temporal_limit", + "title": "Selected-object supplier provenance remains direct-answer-first under temporal limit", + "question": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?", + "allowed_reply_types": [ + "factual" + ], + "expected_intents": [ + "inventory_purchase_provenance_for_item" + ], + "required_direct_answer_patterns_any": [ + "(?i)столешница 600\\*3050\\*26 альмандин", + "(?i)поставщик|поставил|куплен", + "(?i)союз|торговый дом" + ], + "forbidden_direct_answer_patterns": [ + "(?i)^на 31\\.03\\.2021 на складе", + "(?i)^сейчас не дам прямой адресный ответ" + ] + }, + { + "step_id": "step_05_inventory_same_date_restore", + "title": "Same-date restore returns to the March 2021 root snapshot", + "question": "покажи еще раз остатки на эту же дату", + "allowed_reply_types": [ + "factual" + ], + "expected_intents": [ + "inventory_on_hand_as_of_date" + ], + "required_filters": { + "as_of_date": "2021-03-31", + "period_from": "2021-03-01", + "period_to": "2021-03-31" + }, + "required_direct_answer_patterns_any": [ + "31\\.03\\.2021", + "(?i)на складе" + ], + "forbidden_direct_answer_patterns": [ + "(?i)^сейчас не дам прямой адресный ответ", + "(?i)transition_not_supported_by_capability" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/addressCoverageEvidencePolicy.js b/llm_normalizer/backend/dist/services/addressCoverageEvidencePolicy.js new file mode 100644 index 0000000..f45fc67 --- /dev/null +++ b/llm_normalizer/backend/dist/services/addressCoverageEvidencePolicy.js @@ -0,0 +1,299 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION = void 0; +exports.isHeuristicCandidatesIntent = isHeuristicCandidatesIntent; +exports.isConfirmedBalanceIntent = isConfirmedBalanceIntent; +exports.resolveAddressAsOfDateBasis = resolveAddressAsOfDateBasis; +exports.resolveAddressRequestedResultMode = resolveAddressRequestedResultMode; +exports.resolveAddressCoverageEvidence = resolveAddressCoverageEvidence; +exports.attachAddressCoverageEvidence = attachAddressCoverageEvidence; +exports.toAddressCoverageEvidenceContract = toAddressCoverageEvidenceContract; +exports.ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION = "address_coverage_evidence_v1"; +function toRecordObject(value) { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + return value; +} +function toNonEmptyString(value) { + if (value === null || value === undefined) { + return null; + } + const text = String(value).trim(); + return text.length > 0 ? text : null; +} +function normalizeIsoDateHint(value) { + if (typeof value !== "string") { + return null; + } + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + const match = trimmed.match(/^(\d{4})-(\d{2})-(\d{2})(?:T.*)?$/); + if (!match) { + return null; + } + const year = Number(match[1]); + const month = Number(match[2]); + const day = Number(match[3]); + if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) { + return null; + } + const candidate = new Date(Date.UTC(year, month - 1, day)); + if (candidate.getUTCFullYear() !== year || + candidate.getUTCMonth() + 1 !== month || + candidate.getUTCDate() !== day) { + return null; + } + return `${match[1]}-${match[2]}-${match[3]}`; +} +function normalizeReasonCode(value) { + const normalized = value + .trim() + .replace(/[^\p{L}\p{N}_.:-]+/gu, "_") + .replace(/^_+|_+$/g, "") + .toLowerCase(); + return normalized.length > 0 ? normalized.slice(0, 120) : null; +} +function pushReason(target, value) { + const text = toNonEmptyString(value); + if (!text) { + return; + } + const normalized = normalizeReasonCode(text); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} +function isResultMode(value) { + return value === "heuristic_candidates" || value === "confirmed_balance"; +} +function isEvidenceStrength(value) { + return value === "weak" || value === "medium" || value === "strong"; +} +function isCoverageStatus(value) { + return value === "full" || value === "partial" || value === "blocked"; +} +function isAsOfDateBasis(value) { + return (value === "period_end" || + value === "explicit_as_of_date" || + value === "period_range" || + value === "implicit_current_snapshot"); +} +function isEvidenceBasis(value) { + return (value === "matched_rows" || + value === "exact_negative" || + value === "limited_response" || + value === "heuristic_candidates" || + value === "unknown"); +} +function isHeuristicCandidatesIntent(intent) { + return (intent === "list_receivables_counterparties" || + intent === "list_payables_counterparties" || + intent === "list_open_contracts" || + intent === "open_items_by_counterparty_or_contract"); +} +function isConfirmedBalanceIntent(intent) { + return (intent === "account_balance_snapshot" || + intent === "documents_forming_balance" || + intent === "inventory_on_hand_as_of_date" || + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "open_contracts_confirmed_as_of_date" || + intent === "payables_confirmed_as_of_date" || + intent === "receivables_confirmed_as_of_date" || + intent === "vat_payable_confirmed_as_of_date" || + intent === "vat_liability_confirmed_for_tax_period"); +} +function resolveAddressAsOfDateBasis(filters, semanticFrame) { + if (semanticFrame?.date_basis_hint) { + return semanticFrame.date_basis_hint; + } + const asOfDate = normalizeIsoDateHint(filters.as_of_date); + if (asOfDate) { + return "explicit_as_of_date"; + } + const periodFrom = normalizeIsoDateHint(filters.period_from); + const periodTo = normalizeIsoDateHint(filters.period_to); + if (periodFrom && periodTo) { + return "period_range"; + } + if (!periodFrom && periodTo) { + return "period_end"; + } + if (periodFrom) { + return "period_range"; + } + return null; +} +function deriveAddressEvidenceStrength(input) { + if (isHeuristicCandidatesIntent(input.intent)) { + if (input.rowsMatched <= 0 || input.responseType === "LIMITED_WITH_REASON") { + return "weak"; + } + if (input.selectedRecipe === "address_open_items_by_party_or_contract_v1") { + return "medium"; + } + return "weak"; + } + if (isConfirmedBalanceIntent(input.intent)) { + if (input.rowsMatched > 0) { + return "strong"; + } + return input.responseType === "LIMITED_WITH_REASON" ? "weak" : "medium"; + } + return null; +} +function resolveAddressRequestedResultMode(intent, filters, semanticFrame) { + if (isConfirmedBalanceIntent(intent)) { + return "confirmed_balance"; + } + if (intent === "list_open_contracts") { + return "heuristic_candidates"; + } + if (isHeuristicCandidatesIntent(intent)) { + const asOfDateBasis = resolveAddressAsOfDateBasis(filters, semanticFrame); + if (asOfDateBasis === "explicit_as_of_date" || + asOfDateBasis === "period_end" || + asOfDateBasis === "period_range" || + asOfDateBasis === "implicit_current_snapshot") { + return "confirmed_balance"; + } + return "heuristic_candidates"; + } + return null; +} +function balanceConfirmedFrom(input) { + if (isHeuristicCandidatesIntent(input.intent)) { + return false; + } + if (isConfirmedBalanceIntent(input.intent)) { + return input.responseType !== "LIMITED_WITH_REASON"; + } + return null; +} +function coverageStatusFrom(input) { + if (input.responseType === "LIMITED_WITH_REASON") { + return input.resultMode === "heuristic_candidates" ? "partial" : "blocked"; + } + if (input.balanceConfirmed === false) { + return "partial"; + } + if (input.rowsMatched > 0) { + return "full"; + } + if (input.resultMode === "heuristic_candidates") { + return "partial"; + } + if (input.resultMode === "confirmed_balance" && input.balanceConfirmed === true) { + return "full"; + } + return "blocked"; +} +function evidenceBasisFrom(input) { + if (input.responseType === "LIMITED_WITH_REASON") { + return "limited_response"; + } + if (input.resultMode === "heuristic_candidates" || input.balanceConfirmed === false) { + return "heuristic_candidates"; + } + if (input.rowsMatched > 0) { + return "matched_rows"; + } + if (input.resultMode === "confirmed_balance") { + return "exact_negative"; + } + return "unknown"; +} +function resolveAddressCoverageEvidence(input) { + const requestedResultMode = resolveAddressRequestedResultMode(input.intent, input.filters, input.semanticFrame); + const resultMode = input.overrideResultMode ?? requestedResultMode; + const evidenceStrength = input.overrideEvidenceStrength ?? deriveAddressEvidenceStrength(input); + const balanceConfirmed = input.overrideBalanceConfirmed ?? balanceConfirmedFrom(input); + const asOfDateBasis = resolveAddressAsOfDateBasis(input.filters, input.semanticFrame); + const coverageStatus = coverageStatusFrom({ + resultMode, + balanceConfirmed, + responseType: input.responseType, + rowsMatched: input.rowsMatched + }); + const evidenceBasis = evidenceBasisFrom({ + resultMode, + responseType: input.responseType, + rowsMatched: input.rowsMatched, + balanceConfirmed + }); + const reasonCodes = []; + pushReason(reasonCodes, `coverage_status_${coverageStatus}`); + pushReason(reasonCodes, resultMode ? `result_mode_${resultMode}` : "result_mode_unknown"); + pushReason(reasonCodes, evidenceStrength ? `evidence_strength_${evidenceStrength}` : "evidence_strength_none"); + pushReason(reasonCodes, `evidence_basis_${evidenceBasis}`); + pushReason(reasonCodes, balanceConfirmed === true ? "balance_confirmed_true" : balanceConfirmed === false ? "balance_confirmed_false" : "balance_confirmed_unknown"); + pushReason(reasonCodes, asOfDateBasis ? `as_of_date_basis_${asOfDateBasis}` : "as_of_date_basis_none"); + return { + schema_version: exports.ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION, + policy_owner: "addressCoverageEvidencePolicy", + requested_result_mode: requestedResultMode, + result_mode: resultMode, + evidence_strength: evidenceStrength, + balance_confirmed: balanceConfirmed, + as_of_date_basis: asOfDateBasis, + coverage_status: coverageStatus, + evidence_basis: evidenceBasis, + reason_codes: reasonCodes.slice(0, 24) + }; +} +function attachAddressCoverageEvidence(debugPayload, input) { + return { + ...debugPayload, + address_coverage_evidence_v1: resolveAddressCoverageEvidence(input) + }; +} +function toAddressCoverageEvidenceContract(value) { + const record = toRecordObject(value); + if (!record) { + return null; + } + const requestedResultMode = toNonEmptyString(record.requested_result_mode); + const resultMode = toNonEmptyString(record.result_mode); + const evidenceStrength = toNonEmptyString(record.evidence_strength); + const asOfDateBasis = toNonEmptyString(record.as_of_date_basis); + const coverageStatus = toNonEmptyString(record.coverage_status); + const evidenceBasis = toNonEmptyString(record.evidence_basis); + const balanceConfirmed = typeof record.balance_confirmed === "boolean" ? record.balance_confirmed : null; + if (!isCoverageStatus(coverageStatus) || !isEvidenceBasis(evidenceBasis)) { + return null; + } + if (requestedResultMode !== null && !isResultMode(requestedResultMode)) { + return null; + } + if (resultMode !== null && !isResultMode(resultMode)) { + return null; + } + if (evidenceStrength !== null && !isEvidenceStrength(evidenceStrength)) { + return null; + } + if (asOfDateBasis !== null && !isAsOfDateBasis(asOfDateBasis)) { + return null; + } + return { + schema_version: exports.ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION, + policy_owner: "addressCoverageEvidencePolicy", + requested_result_mode: requestedResultMode, + result_mode: resultMode, + evidence_strength: evidenceStrength, + balance_confirmed: balanceConfirmed, + as_of_date_basis: asOfDateBasis, + coverage_status: coverageStatus, + evidence_basis: evidenceBasis, + reason_codes: Array.isArray(record.reason_codes) + ? record.reason_codes + .map((item) => toNonEmptyString(item)) + .filter((item) => Boolean(item)) + .slice(0, 24) + : [] + }; +} diff --git a/llm_normalizer/backend/dist/services/addressQueryService.js b/llm_normalizer/backend/dist/services/addressQueryService.js index 6d62e6e..7a3c01d 100644 --- a/llm_normalizer/backend/dist/services/addressQueryService.js +++ b/llm_normalizer/backend/dist/services/addressQueryService.js @@ -10,6 +10,7 @@ const composeStage_1 = require("./address_runtime/composeStage"); const addressCapabilityPolicy_1 = require("./addressCapabilityPolicy"); const addressRouteExpectations_1 = require("./addressRouteExpectations"); const assistantOrganizationMatcher_1 = require("./assistantOrganizationMatcher"); +const addressCoverageEvidencePolicy_1 = require("./addressCoverageEvidencePolicy"); const addressTruthGatePolicy_1 = require("./addressTruthGatePolicy"); const openaiResponsesClient_1 = require("./openaiResponsesClient"); const files_1 = require("../utils/files"); @@ -1452,124 +1453,6 @@ function isOrganizationScopedInventoryIntent(intent) { function collectOrganizationCandidatesFromRows(rows) { return (0, assistantOrganizationMatcher_1.mergeKnownOrganizations)(rows.map((row) => row.organization).filter((value) => Boolean(value))); } -function isHeuristicCandidatesIntent(intent) { - return (intent === "list_receivables_counterparties" || - intent === "list_payables_counterparties" || - intent === "list_open_contracts" || - intent === "open_items_by_counterparty_or_contract"); -} -function isConfirmedBalanceIntent(intent) { - return (intent === "account_balance_snapshot" || - intent === "documents_forming_balance" || - intent === "inventory_on_hand_as_of_date" || - intent === "inventory_purchase_provenance_for_item" || - intent === "inventory_purchase_documents_for_item" || - intent === "inventory_sale_trace_for_item" || - intent === "inventory_purchase_to_sale_chain" || - intent === "open_contracts_confirmed_as_of_date" || - intent === "payables_confirmed_as_of_date" || - intent === "receivables_confirmed_as_of_date" || - intent === "vat_payable_confirmed_as_of_date" || - intent === "vat_liability_confirmed_for_tax_period"); -} -function resolveAsOfDateBasis(filters, semanticFrame) { - if (semanticFrame?.date_basis_hint) { - return semanticFrame.date_basis_hint; - } - const asOfDate = normalizeAnalysisDateHint(filters.as_of_date); - if (asOfDate) { - return "explicit_as_of_date"; - } - const periodFrom = normalizeAnalysisDateHint(filters.period_from); - const periodTo = normalizeAnalysisDateHint(filters.period_to); - if (periodFrom && periodTo) { - return "period_range"; - } - if (!periodFrom && periodTo) { - return "period_end"; - } - if (periodFrom) { - return "period_range"; - } - return null; -} -function deriveAddressEvidenceStrength(input) { - if (isHeuristicCandidatesIntent(input.intent)) { - if (input.rowsMatched <= 0 || input.responseType === "LIMITED_WITH_REASON") { - return "weak"; - } - if (input.selectedRecipe === "address_open_items_by_party_or_contract_v1") { - return "medium"; - } - return "weak"; - } - if (isConfirmedBalanceIntent(input.intent)) { - if (input.rowsMatched > 0) { - return "strong"; - } - return input.responseType === "LIMITED_WITH_REASON" ? "weak" : "medium"; - } - return undefined; -} -function resolveRequestedResultMode(intent, filters, semanticFrame) { - if (isConfirmedBalanceIntent(intent)) { - return "confirmed_balance"; - } - if (intent === "list_open_contracts") { - return "heuristic_candidates"; - } - if (isHeuristicCandidatesIntent(intent)) { - const asOfDateBasis = resolveAsOfDateBasis(filters, semanticFrame); - if (asOfDateBasis === "explicit_as_of_date" || - asOfDateBasis === "period_end" || - asOfDateBasis === "period_range" || - asOfDateBasis === "implicit_current_snapshot") { - return "confirmed_balance"; - } - return "heuristic_candidates"; - } - return undefined; -} -function deriveAddressResultSemantics(input) { - const asOfDateBasis = resolveAsOfDateBasis(input.filters, input.semanticFrame); - const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters, input.semanticFrame); - if (isHeuristicCandidatesIntent(input.intent)) { - return { - requested_result_mode: requestedResultMode, - result_mode: "heuristic_candidates", - evidence_strength: deriveAddressEvidenceStrength(input), - balance_confirmed: false, - as_of_date_basis: asOfDateBasis - }; - } - if (isConfirmedBalanceIntent(input.intent)) { - const balanceConfirmed = input.responseType !== "LIMITED_WITH_REASON"; - return { - requested_result_mode: requestedResultMode, - result_mode: "confirmed_balance", - evidence_strength: deriveAddressEvidenceStrength(input), - balance_confirmed: balanceConfirmed, - as_of_date_basis: asOfDateBasis ?? "period_end" - }; - } - if (requestedResultMode) { - return { - requested_result_mode: requestedResultMode - }; - } - return {}; -} -function mergeAddressResultSemantics(base, override) { - if (!override) { - return base; - } - return { - ...base, - ...(override.result_mode ? { result_mode: override.result_mode } : {}), - ...(override.evidence_strength ? { evidence_strength: override.evidence_strength } : {}), - ...(typeof override.balance_confirmed === "boolean" ? { balance_confirmed: override.balance_confirmed } : {}) - }; -} function withConfirmedBalanceFallbackReason(reasons, requestedResultMode, semantics, baseResultMode) { if (requestedResultMode !== "confirmed_balance") { return reasons; @@ -2479,7 +2362,7 @@ function composeLimitedReply(input) { } function buildLimitedExecutionResult(input) { const accountScopeAudit = input.accountScopeAudit ?? buildDefaultAccountScopeAudit(input.filters); - const resultSemantics = deriveAddressResultSemantics({ + const coverageEvidence = (0, addressCoverageEvidencePolicy_1.resolveAddressCoverageEvidence)({ intent: input.intent.intent, selectedRecipe: input.selectedRecipe, filters: input.filters, @@ -2487,8 +2370,7 @@ function buildLimitedExecutionResult(input) { responseType: "LIMITED_WITH_REASON", rowsMatched: input.rowsMatched }); - const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters, input.semanticFrame); - const reasonsWithConfirmedFallback = withConfirmedBalanceFallbackReason(input.reasons, requestedResultMode, undefined, resultSemantics.result_mode); + const reasonsWithConfirmedFallback = withConfirmedBalanceFallbackReason(input.reasons, coverageEvidence.requested_result_mode ?? undefined, undefined, coverageEvidence.result_mode ?? undefined); const exactLimitedReason = input.intent.intent === "inventory_on_hand_as_of_date" ? "exact_inventory_mode_limited_response" : input.intent.intent === "payables_confirmed_as_of_date" @@ -2507,11 +2389,11 @@ function buildLimitedExecutionResult(input) { buildRouteExpectationAudit({ intent: input.intent.intent, selectedRecipe: input.selectedRecipe, - requestedResultMode: requestedResultMode, - resultMode: resultSemantics.result_mode + requestedResultMode: coverageEvidence.requested_result_mode ?? undefined, + resultMode: coverageEvidence.result_mode ?? undefined }); const runtimeReadiness = runtimeReadinessForLimitedCategory(input.category); - const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)({ + const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)((0, addressCoverageEvidencePolicy_1.attachAddressCoverageEvidence)({ detected_mode: input.mode.mode, detected_mode_confidence: input.mode.confidence, query_shape: input.shape.shape, @@ -2562,10 +2444,21 @@ function buildLimitedExecutionResult(input) { route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, - ...resultSemantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: input.limitations, reasons }, { + intent: input.intent.intent, + selectedRecipe: input.selectedRecipe, + filters: input.filters, + semanticFrame: input.semanticFrame ?? null, + responseType: "LIMITED_WITH_REASON", + rowsMatched: input.rowsMatched + }), { intent: input.intent.intent, filters: input.filters, semanticFrame: input.semanticFrame ?? null, @@ -2692,12 +2585,12 @@ class AddressQueryService { capabilityAudit: buildCapabilityAudit(intent.intent), shadowRouteAudit: buildShadowRouteAudit({ intent: intent.intent, - requestedResultMode: resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame), + requestedResultMode: (0, addressCoverageEvidencePolicy_1.resolveAddressRequestedResultMode)(intent.intent, filters.extracted_filters, semanticFrame) ?? undefined, filters: filters.extracted_filters }) }); } - const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame); + const requestedResultMode = (0, addressCoverageEvidencePolicy_1.resolveAddressRequestedResultMode)(intent.intent, filters.extracted_filters, semanticFrame) ?? undefined; const confirmedBalancePayablesIntent = (intent.intent === "list_payables_counterparties" || intent.intent === "payables_confirmed_as_of_date") && requestedResultMode === "confirmed_balance"; const confirmedBalanceReceivablesIntent = intent.intent === "receivables_confirmed_as_of_date" && requestedResultMode === "confirmed_balance"; @@ -3331,23 +3224,23 @@ class AddressQueryService { : null; const buildFactualNoRowsResult = (replyText, noRowsReason, extraLimitations = []) => { const responseType = "FACTUAL_SUMMARY"; - const semantics = mergeAddressResultSemantics(deriveAddressResultSemantics({ + const coverageEvidence = (0, addressCoverageEvidencePolicy_1.resolveAddressCoverageEvidence)({ intent: intent.intent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, semanticFrame, responseType, rowsMatched: 0 - }), undefined); + }); const factualNoRowsLimitations = [...filters.warnings, ...extraLimitations]; - const factualNoRowsReasons = withConfirmedBalanceFallbackReason([...baseReasons, noRowsReason], requestedResultMode, undefined, semantics.result_mode); + const factualNoRowsReasons = withConfirmedBalanceFallbackReason([...baseReasons, noRowsReason], requestedResultMode, undefined, coverageEvidence.result_mode ?? undefined); const routeExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: effectiveRecipeId, requestedResultMode, - resultMode: semantics.result_mode + resultMode: coverageEvidence.result_mode ?? undefined }); - const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)({ + const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)((0, addressCoverageEvidencePolicy_1.attachAddressCoverageEvidence)({ detected_mode: mode.mode, detected_mode_confidence: mode.confidence, query_shape: shape.shape, @@ -3389,7 +3282,11 @@ class AddressQueryService { route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, - ...semantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: factualNoRowsLimitations, reasons: factualNoRowsReasons, ...(capabilityAudit @@ -3409,6 +3306,13 @@ class AddressQueryService { } : {}) }, { + intent: intent.intent, + selectedRecipe: effectiveRecipeId, + filters: filters.extracted_filters, + semanticFrame, + responseType, + rowsMatched: 0 + }), { intent: intent.intent, filters: filters.extracted_filters, semanticFrame, @@ -3431,22 +3335,27 @@ class AddressQueryService { }; }; const buildFactualExecutionResult = (input) => { - const resultSemantics = mergeAddressResultSemantics(deriveAddressResultSemantics({ + const coverageEvidence = (0, addressCoverageEvidencePolicy_1.resolveAddressCoverageEvidence)({ intent: intent.intent, selectedRecipe: input.selectedRecipe, filters: input.extractedFilters ?? filters.extracted_filters, semanticFrame: input.semanticFrame ?? semanticFrame, responseType: input.responseType, - rowsMatched: input.rowsMatched - }), input.responseSemantics); + rowsMatched: input.rowsMatched, + overrideResultMode: input.responseSemantics?.result_mode ?? null, + overrideEvidenceStrength: input.responseSemantics?.evidence_strength ?? null, + overrideBalanceConfirmed: typeof input.responseSemantics?.balance_confirmed === "boolean" + ? input.responseSemantics.balance_confirmed + : null + }); const routeExpectationAudit = input.routeExpectationAudit ?? buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: input.selectedRecipe, requestedResultMode, - resultMode: resultSemantics.result_mode + resultMode: coverageEvidence.result_mode ?? undefined }); - const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)({ + const debugPayload = (0, addressTruthGatePolicy_1.attachAddressTruthGate)((0, addressCoverageEvidencePolicy_1.attachAddressCoverageEvidence)({ detected_mode: mode.mode, detected_mode_confidence: mode.confidence, query_shape: shape.shape, @@ -3489,7 +3398,11 @@ class AddressQueryService { route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, semantic_frame: input.semanticFrame ?? semanticFrame, - ...resultSemantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: input.limitations, reasons: input.reasons, ...(input.capabilityAudit @@ -3509,6 +3422,18 @@ class AddressQueryService { } : {}) }, { + intent: intent.intent, + selectedRecipe: input.selectedRecipe, + filters: input.extractedFilters ?? filters.extracted_filters, + semanticFrame: input.semanticFrame ?? semanticFrame, + responseType: input.responseType, + rowsMatched: input.rowsMatched, + overrideResultMode: input.responseSemantics?.result_mode ?? null, + overrideEvidenceStrength: input.responseSemantics?.evidence_strength ?? null, + overrideBalanceConfirmed: typeof input.responseSemantics?.balance_confirmed === "boolean" + ? input.responseSemantics.balance_confirmed + : null + }), { intent: intent.intent, filters: input.extractedFilters ?? filters.extracted_filters, semanticFrame: input.semanticFrame ?? semanticFrame, @@ -3784,19 +3709,24 @@ class AddressQueryService { "period_window_auto_broadened_to_available_data" ]; const broadenedReasons = [...baseReasons, ...broadenedAdjustments, "period_window_auto_broadened_to_available_data"]; - const broadenedResultSemantics = mergeAddressResultSemantics(deriveAddressResultSemantics({ + const broadenedCoverageEvidence = (0, addressCoverageEvidencePolicy_1.resolveAddressCoverageEvidence)({ intent: intent.intent, selectedRecipe: broadenedSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, semanticFrame, responseType: broadenedFactual.responseType, - rowsMatched: broadenedFilteredRows.length - }), broadenedFactual.semantics); + rowsMatched: broadenedFilteredRows.length, + overrideResultMode: broadenedFactual.semantics?.result_mode ?? null, + overrideEvidenceStrength: broadenedFactual.semantics?.evidence_strength ?? null, + overrideBalanceConfirmed: typeof broadenedFactual.semantics?.balance_confirmed === "boolean" + ? broadenedFactual.semantics.balance_confirmed + : null + }); const broadenedRouteExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: broadenedSelection.selected_recipe.recipe_id, requestedResultMode, - resultMode: broadenedResultSemantics.result_mode + resultMode: broadenedCoverageEvidence.result_mode ?? undefined }); return buildFactualExecutionResult({ replyText: injectNoticeAfterLeadLine(broadenedFactual.text, broadenedPrefix), @@ -4155,19 +4085,22 @@ class AddressQueryService { : [] : []; const factualLimitations = [...filters.warnings, ...vatProbeLimitations]; - const factualResultSemantics = mergeAddressResultSemantics(deriveAddressResultSemantics({ + const factualCoverageEvidence = (0, addressCoverageEvidencePolicy_1.resolveAddressCoverageEvidence)({ intent: composeIntent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, semanticFrame, responseType: factual.responseType, - rowsMatched: filteredRows.length - }), factual.semantics); + rowsMatched: filteredRows.length, + overrideResultMode: factual.semantics?.result_mode ?? null, + overrideEvidenceStrength: factual.semantics?.evidence_strength ?? null, + overrideBalanceConfirmed: typeof factual.semantics?.balance_confirmed === "boolean" ? factual.semantics.balance_confirmed : null + }); const finalRouteExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: effectiveRecipeId, requestedResultMode, - resultMode: factualResultSemantics.result_mode + resultMode: factualCoverageEvidence.result_mode ?? undefined }); if (finalRouteExpectationAudit.status === "mismatch" && config_1.FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1) { return finalizeLimitedResult({ @@ -4208,7 +4141,7 @@ class AddressQueryService { (intent.intent === "vat_payable_confirmed_as_of_date" && composeIntent === "vat_payable_confirmed_as_of_date") || (intent.intent === "vat_liability_confirmed_for_tax_period" && composeIntent === "vat_liability_confirmed_for_tax_period"); - if (exactConfirmedIntent && factualResultSemantics.balance_confirmed !== true) { + if (exactConfirmedIntent && factualCoverageEvidence.balance_confirmed !== true) { const exactModeName = intent.intent === "payables_confirmed_as_of_date" ? "payables" : intent.intent === "receivables_confirmed_as_of_date" @@ -4277,7 +4210,7 @@ class AddressQueryService { matchFailureStage: "none", matchFailureReason: null, limitations: factualLimitations, - reasons: withConfirmedBalanceFallbackReason(reasonsWithRouteExpectation, requestedResultMode, factual.semantics, factualResultSemantics.result_mode), + reasons: withConfirmedBalanceFallbackReason(reasonsWithRouteExpectation, requestedResultMode, factual.semantics, factualCoverageEvidence.result_mode ?? undefined), routeExpectationAudit: finalRouteExpectationAudit, capabilityAudit, shadowRouteAudit, diff --git a/llm_normalizer/backend/dist/services/assistantTruthAnswerPolicyRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantTruthAnswerPolicyRuntimeAdapter.js index 60f57e1..d7af741 100644 --- a/llm_normalizer/backend/dist/services/assistantTruthAnswerPolicyRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantTruthAnswerPolicyRuntimeAdapter.js @@ -4,6 +4,7 @@ exports.resolveAssistantTruthAnswerPolicyRuntime = resolveAssistantTruthAnswerPo exports.buildAssistantTruthAnswerPolicyRuntimeFields = buildAssistantTruthAnswerPolicyRuntimeFields; exports.attachAssistantTruthAnswerPolicy = attachAssistantTruthAnswerPolicy; const assistantRuntimeContracts_1 = require("../types/assistantRuntimeContracts"); +const addressCoverageEvidencePolicy_1 = require("./addressCoverageEvidencePolicy"); const addressTruthGatePolicy_1 = require("./addressTruthGatePolicy"); const assistantRuntimeContractResolver_1 = require("./assistantRuntimeContractResolver"); function toRecordObject(value) { @@ -78,6 +79,7 @@ function groundingStatusFrom(debug, input, truthGateStatus) { return "unsupported"; } function coverageStatusFrom(debug, input, truthGateStatus, groundingStatus) { + const explicitCoverageEvidence = (0, addressCoverageEvidencePolicy_1.toAddressCoverageEvidenceContract)(debug.address_coverage_evidence_v1); if (truthGateStatus === "full_confirmed") { return "full"; } @@ -90,6 +92,9 @@ function coverageStatusFrom(debug, input, truthGateStatus, groundingStatus) { if (toStringList(debug.missing_required_filters).length > 0 || groundingStatus === "route_mismatch_blocked" || groundingStatus === "no_grounded_answer") { return "blocked"; } + if (explicitCoverageEvidence) { + return explicitCoverageEvidence.coverage_status; + } const coverageReport = toRecordObject(input.coverageReport) ?? toRecordObject(debug.coverage_report); if (coverageReport) { const total = asNumber(coverageReport.requirements_total); @@ -127,6 +132,10 @@ function truthModeFrom(input) { return "unsupported"; } function evidenceGradeFrom(debug, coverageStatus, groundingStatus, truthGateStatus) { + const explicitCoverageEvidence = (0, addressCoverageEvidencePolicy_1.toAddressCoverageEvidenceContract)(debug.address_coverage_evidence_v1); + if (explicitCoverageEvidence?.evidence_strength && isEvidenceGrade(explicitCoverageEvidence.evidence_strength)) { + return explicitCoverageEvidence.evidence_strength; + } const explicit = toNonEmptyString(debug.evidence_strength); if (isEvidenceGrade(explicit)) { return explicit; @@ -166,6 +175,7 @@ function collectReasonCodes(input) { pushReason(reasons, `truth_gate_${input.truthGateStatus}`); pushReason(reasons, `truth_mode_${input.truthMode}`); input.explicitGateReasonCodes.forEach((item) => pushReason(reasons, item)); + input.explicitCoverageReasonCodes.forEach((item) => pushReason(reasons, item)); input.shadow.transition_contract_reason.forEach((item) => pushReason(reasons, item)); input.shadow.capability_contract_reason.forEach((item) => pushReason(reasons, item)); toStringList(input.debug.missing_required_filters).forEach((item) => pushReason(reasons, `missing_filter_${item}`)); @@ -235,6 +245,7 @@ function requiredSectionsFor(shape) { function resolveAssistantTruthAnswerPolicyRuntime(input) { const debug = toRecordObject(input.addressDebug) ?? {}; const explicitAddressTruthGate = (0, addressTruthGatePolicy_1.toAddressTruthGateContract)(debug.address_truth_gate_v1); + const explicitCoverageEvidence = (0, addressCoverageEvidencePolicy_1.toAddressCoverageEvidenceContract)(debug.address_coverage_evidence_v1); const shadow = (0, assistantRuntimeContractResolver_1.resolveAssistantRuntimeContractShadow)({ addressDebug: debug, addressRuntimeMeta: input.addressRuntimeMeta, @@ -257,7 +268,8 @@ function resolveAssistantTruthAnswerPolicyRuntime(input) { shadow, truthMode, truthGateStatus, - explicitGateReasonCodes: explicitAddressTruthGate?.reason_codes ?? [] + explicitGateReasonCodes: explicitAddressTruthGate?.reason_codes ?? [], + explicitCoverageReasonCodes: explicitCoverageEvidence?.reason_codes ?? [] }); const shape = answerShapeFrom({ coverageStatus, diff --git a/llm_normalizer/backend/src/services/addressCoverageEvidencePolicy.ts b/llm_normalizer/backend/src/services/addressCoverageEvidencePolicy.ts new file mode 100644 index 0000000..18bdbce --- /dev/null +++ b/llm_normalizer/backend/src/services/addressCoverageEvidencePolicy.ts @@ -0,0 +1,391 @@ +import type { AssistantCoverageStatus } from "../types/assistantRuntimeContracts"; +import type { + AddressAsOfDateBasis, + AddressCoverageEvidenceBasis, + AddressEvidenceStrength, + AddressFilterSet, + AddressIntent, + AddressResponseType, + AddressResultMode, + AddressSemanticFrame +} from "../types/addressQuery"; + +export const ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION = "address_coverage_evidence_v1" as const; + +export interface AddressCoverageEvidenceContract { + schema_version: typeof ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION; + policy_owner: "addressCoverageEvidencePolicy"; + requested_result_mode: AddressResultMode | null; + result_mode: AddressResultMode | null; + evidence_strength: AddressEvidenceStrength | null; + balance_confirmed: boolean | null; + as_of_date_basis: AddressAsOfDateBasis | null; + coverage_status: AssistantCoverageStatus; + evidence_basis: AddressCoverageEvidenceBasis; + reason_codes: string[]; +} + +export interface ResolveAddressCoverageEvidenceInput { + intent: AddressIntent; + selectedRecipe: string | null; + filters: AddressFilterSet; + semanticFrame?: AddressSemanticFrame | null; + responseType: AddressResponseType; + rowsMatched: number; + overrideResultMode?: AddressResultMode | null; + overrideEvidenceStrength?: AddressEvidenceStrength | null; + overrideBalanceConfirmed?: boolean | null; +} + +function toRecordObject(value: unknown): Record | null { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + return value as Record; +} + +function toNonEmptyString(value: unknown): string | null { + if (value === null || value === undefined) { + return null; + } + const text = String(value).trim(); + return text.length > 0 ? text : null; +} + +function normalizeIsoDateHint(value: unknown): string | null { + if (typeof value !== "string") { + return null; + } + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + const match = trimmed.match(/^(\d{4})-(\d{2})-(\d{2})(?:T.*)?$/); + if (!match) { + return null; + } + const year = Number(match[1]); + const month = Number(match[2]); + const day = Number(match[3]); + if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) { + return null; + } + const candidate = new Date(Date.UTC(year, month - 1, day)); + if ( + candidate.getUTCFullYear() !== year || + candidate.getUTCMonth() + 1 !== month || + candidate.getUTCDate() !== day + ) { + return null; + } + return `${match[1]}-${match[2]}-${match[3]}`; +} + +function normalizeReasonCode(value: string): string | null { + const normalized = value + .trim() + .replace(/[^\p{L}\p{N}_.:-]+/gu, "_") + .replace(/^_+|_+$/g, "") + .toLowerCase(); + return normalized.length > 0 ? normalized.slice(0, 120) : null; +} + +function pushReason(target: string[], value: unknown): void { + const text = toNonEmptyString(value); + if (!text) { + return; + } + const normalized = normalizeReasonCode(text); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} + +function isResultMode(value: string | null): value is AddressResultMode { + return value === "heuristic_candidates" || value === "confirmed_balance"; +} + +function isEvidenceStrength(value: string | null): value is AddressEvidenceStrength { + return value === "weak" || value === "medium" || value === "strong"; +} + +function isCoverageStatus(value: string | null): value is AssistantCoverageStatus { + return value === "full" || value === "partial" || value === "blocked"; +} + +function isAsOfDateBasis(value: string | null): value is AddressAsOfDateBasis { + return ( + value === "period_end" || + value === "explicit_as_of_date" || + value === "period_range" || + value === "implicit_current_snapshot" + ); +} + +function isEvidenceBasis(value: string | null): value is AddressCoverageEvidenceBasis { + return ( + value === "matched_rows" || + value === "exact_negative" || + value === "limited_response" || + value === "heuristic_candidates" || + value === "unknown" + ); +} + +export function isHeuristicCandidatesIntent(intent: AddressIntent): boolean { + return ( + intent === "list_receivables_counterparties" || + intent === "list_payables_counterparties" || + intent === "list_open_contracts" || + intent === "open_items_by_counterparty_or_contract" + ); +} + +export function isConfirmedBalanceIntent(intent: AddressIntent): boolean { + return ( + intent === "account_balance_snapshot" || + intent === "documents_forming_balance" || + intent === "inventory_on_hand_as_of_date" || + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "open_contracts_confirmed_as_of_date" || + intent === "payables_confirmed_as_of_date" || + intent === "receivables_confirmed_as_of_date" || + intent === "vat_payable_confirmed_as_of_date" || + intent === "vat_liability_confirmed_for_tax_period" + ); +} + +export function resolveAddressAsOfDateBasis( + filters: AddressFilterSet, + semanticFrame?: AddressSemanticFrame | null +): AddressAsOfDateBasis | null { + if (semanticFrame?.date_basis_hint) { + return semanticFrame.date_basis_hint; + } + const asOfDate = normalizeIsoDateHint(filters.as_of_date); + if (asOfDate) { + return "explicit_as_of_date"; + } + const periodFrom = normalizeIsoDateHint(filters.period_from); + const periodTo = normalizeIsoDateHint(filters.period_to); + if (periodFrom && periodTo) { + return "period_range"; + } + if (!periodFrom && periodTo) { + return "period_end"; + } + if (periodFrom) { + return "period_range"; + } + return null; +} + +function deriveAddressEvidenceStrength(input: { + intent: AddressIntent; + selectedRecipe: string | null; + responseType: AddressResponseType; + rowsMatched: number; +}): AddressEvidenceStrength | null { + if (isHeuristicCandidatesIntent(input.intent)) { + if (input.rowsMatched <= 0 || input.responseType === "LIMITED_WITH_REASON") { + return "weak"; + } + if (input.selectedRecipe === "address_open_items_by_party_or_contract_v1") { + return "medium"; + } + return "weak"; + } + if (isConfirmedBalanceIntent(input.intent)) { + if (input.rowsMatched > 0) { + return "strong"; + } + return input.responseType === "LIMITED_WITH_REASON" ? "weak" : "medium"; + } + return null; +} + +export function resolveAddressRequestedResultMode( + intent: AddressIntent, + filters: AddressFilterSet, + semanticFrame?: AddressSemanticFrame | null +): AddressResultMode | null { + if (isConfirmedBalanceIntent(intent)) { + return "confirmed_balance"; + } + if (intent === "list_open_contracts") { + return "heuristic_candidates"; + } + if (isHeuristicCandidatesIntent(intent)) { + const asOfDateBasis = resolveAddressAsOfDateBasis(filters, semanticFrame); + if ( + asOfDateBasis === "explicit_as_of_date" || + asOfDateBasis === "period_end" || + asOfDateBasis === "period_range" || + asOfDateBasis === "implicit_current_snapshot" + ) { + return "confirmed_balance"; + } + return "heuristic_candidates"; + } + return null; +} + +function balanceConfirmedFrom(input: { + intent: AddressIntent; + responseType: AddressResponseType; +}): boolean | null { + if (isHeuristicCandidatesIntent(input.intent)) { + return false; + } + if (isConfirmedBalanceIntent(input.intent)) { + return input.responseType !== "LIMITED_WITH_REASON"; + } + return null; +} + +function coverageStatusFrom(input: { + resultMode: AddressResultMode | null; + balanceConfirmed: boolean | null; + responseType: AddressResponseType; + rowsMatched: number; +}): AssistantCoverageStatus { + if (input.responseType === "LIMITED_WITH_REASON") { + return input.resultMode === "heuristic_candidates" ? "partial" : "blocked"; + } + if (input.balanceConfirmed === false) { + return "partial"; + } + if (input.rowsMatched > 0) { + return "full"; + } + if (input.resultMode === "heuristic_candidates") { + return "partial"; + } + if (input.resultMode === "confirmed_balance" && input.balanceConfirmed === true) { + return "full"; + } + return "blocked"; +} + +function evidenceBasisFrom(input: { + resultMode: AddressResultMode | null; + responseType: AddressResponseType; + rowsMatched: number; + balanceConfirmed: boolean | null; +}): AddressCoverageEvidenceBasis { + if (input.responseType === "LIMITED_WITH_REASON") { + return "limited_response"; + } + if (input.resultMode === "heuristic_candidates" || input.balanceConfirmed === false) { + return "heuristic_candidates"; + } + if (input.rowsMatched > 0) { + return "matched_rows"; + } + if (input.resultMode === "confirmed_balance") { + return "exact_negative"; + } + return "unknown"; +} + +export function resolveAddressCoverageEvidence( + input: ResolveAddressCoverageEvidenceInput +): AddressCoverageEvidenceContract { + const requestedResultMode = resolveAddressRequestedResultMode(input.intent, input.filters, input.semanticFrame); + const resultMode = input.overrideResultMode ?? requestedResultMode; + const evidenceStrength = + input.overrideEvidenceStrength ?? deriveAddressEvidenceStrength(input); + const balanceConfirmed = + input.overrideBalanceConfirmed ?? balanceConfirmedFrom(input); + const asOfDateBasis = resolveAddressAsOfDateBasis(input.filters, input.semanticFrame); + const coverageStatus = coverageStatusFrom({ + resultMode, + balanceConfirmed, + responseType: input.responseType, + rowsMatched: input.rowsMatched + }); + const evidenceBasis = evidenceBasisFrom({ + resultMode, + responseType: input.responseType, + rowsMatched: input.rowsMatched, + balanceConfirmed + }); + const reasonCodes: string[] = []; + pushReason(reasonCodes, `coverage_status_${coverageStatus}`); + pushReason(reasonCodes, resultMode ? `result_mode_${resultMode}` : "result_mode_unknown"); + pushReason(reasonCodes, evidenceStrength ? `evidence_strength_${evidenceStrength}` : "evidence_strength_none"); + pushReason(reasonCodes, `evidence_basis_${evidenceBasis}`); + pushReason(reasonCodes, balanceConfirmed === true ? "balance_confirmed_true" : balanceConfirmed === false ? "balance_confirmed_false" : "balance_confirmed_unknown"); + pushReason(reasonCodes, asOfDateBasis ? `as_of_date_basis_${asOfDateBasis}` : "as_of_date_basis_none"); + return { + schema_version: ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION, + policy_owner: "addressCoverageEvidencePolicy", + requested_result_mode: requestedResultMode, + result_mode: resultMode, + evidence_strength: evidenceStrength, + balance_confirmed: balanceConfirmed, + as_of_date_basis: asOfDateBasis, + coverage_status: coverageStatus, + evidence_basis: evidenceBasis, + reason_codes: reasonCodes.slice(0, 24) + }; +} + +export function attachAddressCoverageEvidence>( + debugPayload: T, + input: ResolveAddressCoverageEvidenceInput +): T & { address_coverage_evidence_v1: AddressCoverageEvidenceContract } { + return { + ...debugPayload, + address_coverage_evidence_v1: resolveAddressCoverageEvidence(input) + }; +} + +export function toAddressCoverageEvidenceContract(value: unknown): AddressCoverageEvidenceContract | null { + const record = toRecordObject(value); + if (!record) { + return null; + } + const requestedResultMode = toNonEmptyString(record.requested_result_mode); + const resultMode = toNonEmptyString(record.result_mode); + const evidenceStrength = toNonEmptyString(record.evidence_strength); + const asOfDateBasis = toNonEmptyString(record.as_of_date_basis); + const coverageStatus = toNonEmptyString(record.coverage_status); + const evidenceBasis = toNonEmptyString(record.evidence_basis); + const balanceConfirmed = typeof record.balance_confirmed === "boolean" ? record.balance_confirmed : null; + if (!isCoverageStatus(coverageStatus) || !isEvidenceBasis(evidenceBasis)) { + return null; + } + if (requestedResultMode !== null && !isResultMode(requestedResultMode)) { + return null; + } + if (resultMode !== null && !isResultMode(resultMode)) { + return null; + } + if (evidenceStrength !== null && !isEvidenceStrength(evidenceStrength)) { + return null; + } + if (asOfDateBasis !== null && !isAsOfDateBasis(asOfDateBasis)) { + return null; + } + return { + schema_version: ADDRESS_COVERAGE_EVIDENCE_SCHEMA_VERSION, + policy_owner: "addressCoverageEvidencePolicy", + requested_result_mode: requestedResultMode, + result_mode: resultMode, + evidence_strength: evidenceStrength, + balance_confirmed: balanceConfirmed, + as_of_date_basis: asOfDateBasis, + coverage_status: coverageStatus, + evidence_basis: evidenceBasis, + reason_codes: Array.isArray(record.reason_codes) + ? record.reason_codes + .map((item) => toNonEmptyString(item)) + .filter((item): item is string => Boolean(item)) + .slice(0, 24) + : [] + }; +} diff --git a/llm_normalizer/backend/src/services/addressQueryService.ts b/llm_normalizer/backend/src/services/addressQueryService.ts index 913de9a..66e953e 100644 --- a/llm_normalizer/backend/src/services/addressQueryService.ts +++ b/llm_normalizer/backend/src/services/addressQueryService.ts @@ -14,8 +14,6 @@ import type { AddressCapabilityLayer, AddressCapabilityRouteMode, AddressShadowRouteStatus, - AddressAsOfDateBasis, - AddressEvidenceStrength, AddressExecutionResult, AddressFilterSet, AddressIntent, @@ -64,6 +62,13 @@ import { normalizeOrganizationScopeValue, resolveOrganizationSelectionFromMessage } from "./assistantOrganizationMatcher"; +import { + attachAddressCoverageEvidence, + isConfirmedBalanceIntent, + isHeuristicCandidatesIntent, + resolveAddressRequestedResultMode, + resolveAddressCoverageEvidence +} from "./addressCoverageEvidencePolicy"; import { attachAddressTruthGate } from "./addressTruthGatePolicy"; import { OpenAIResponsesClient, type OpenAIRequestConfig } from "./openaiResponsesClient"; import { readJsonFile } from "../utils/files"; @@ -1802,168 +1807,10 @@ function collectOrganizationCandidatesFromRows(rows: NormalizedAddressRow[]): st return mergeKnownOrganizations(rows.map((row) => row.organization).filter((value): value is string => Boolean(value))); } -function isHeuristicCandidatesIntent(intent: AddressIntent): boolean { - return ( - intent === "list_receivables_counterparties" || - intent === "list_payables_counterparties" || - intent === "list_open_contracts" || - intent === "open_items_by_counterparty_or_contract" - ); -} - -function isConfirmedBalanceIntent(intent: AddressIntent): boolean { - return ( - intent === "account_balance_snapshot" || - intent === "documents_forming_balance" || - intent === "inventory_on_hand_as_of_date" || - intent === "inventory_purchase_provenance_for_item" || - intent === "inventory_purchase_documents_for_item" || - intent === "inventory_sale_trace_for_item" || - intent === "inventory_purchase_to_sale_chain" || - intent === "open_contracts_confirmed_as_of_date" || - intent === "payables_confirmed_as_of_date" || - intent === "receivables_confirmed_as_of_date" || - intent === "vat_payable_confirmed_as_of_date" || - intent === "vat_liability_confirmed_for_tax_period" - ); -} - -function resolveAsOfDateBasis(filters: AddressFilterSet, semanticFrame?: AddressSemanticFrame | null): AddressAsOfDateBasis | null { - if (semanticFrame?.date_basis_hint) { - return semanticFrame.date_basis_hint; - } - const asOfDate = normalizeAnalysisDateHint(filters.as_of_date); - if (asOfDate) { - return "explicit_as_of_date"; - } - const periodFrom = normalizeAnalysisDateHint(filters.period_from); - const periodTo = normalizeAnalysisDateHint(filters.period_to); - if (periodFrom && periodTo) { - return "period_range"; - } - if (!periodFrom && periodTo) { - return "period_end"; - } - if (periodFrom) { - return "period_range"; - } - return null; -} - -function deriveAddressEvidenceStrength(input: { - intent: AddressIntent; - selectedRecipe: string | null; - responseType: AddressResponseType; - rowsMatched: number; -}): AddressEvidenceStrength | undefined { - if (isHeuristicCandidatesIntent(input.intent)) { - if (input.rowsMatched <= 0 || input.responseType === "LIMITED_WITH_REASON") { - return "weak"; - } - if (input.selectedRecipe === "address_open_items_by_party_or_contract_v1") { - return "medium"; - } - return "weak"; - } - if (isConfirmedBalanceIntent(input.intent)) { - if (input.rowsMatched > 0) { - return "strong"; - } - return input.responseType === "LIMITED_WITH_REASON" ? "weak" : "medium"; - } - return undefined; -} - -function resolveRequestedResultMode( - intent: AddressIntent, - filters: AddressFilterSet, - semanticFrame?: AddressSemanticFrame | null -): AddressResultMode | undefined { - if (isConfirmedBalanceIntent(intent)) { - return "confirmed_balance"; - } - if (intent === "list_open_contracts") { - return "heuristic_candidates"; - } - if (isHeuristicCandidatesIntent(intent)) { - const asOfDateBasis = resolveAsOfDateBasis(filters, semanticFrame); - if ( - asOfDateBasis === "explicit_as_of_date" || - asOfDateBasis === "period_end" || - asOfDateBasis === "period_range" || - asOfDateBasis === "implicit_current_snapshot" - ) { - return "confirmed_balance"; - } - return "heuristic_candidates"; - } - return undefined; -} - -function deriveAddressResultSemantics(input: { - intent: AddressIntent; - selectedRecipe: string | null; - filters: AddressFilterSet; - semanticFrame?: AddressSemanticFrame | null; - responseType: AddressResponseType; - rowsMatched: number; -}): { - requested_result_mode?: AddressResultMode; - result_mode?: AddressResultMode; - evidence_strength?: AddressEvidenceStrength; - balance_confirmed?: boolean; - as_of_date_basis?: AddressAsOfDateBasis | null; -} { - const asOfDateBasis = resolveAsOfDateBasis(input.filters, input.semanticFrame); - const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters, input.semanticFrame); - if (isHeuristicCandidatesIntent(input.intent)) { - return { - requested_result_mode: requestedResultMode, - result_mode: "heuristic_candidates", - evidence_strength: deriveAddressEvidenceStrength(input), - balance_confirmed: false, - as_of_date_basis: asOfDateBasis - }; - } - if (isConfirmedBalanceIntent(input.intent)) { - const balanceConfirmed = input.responseType !== "LIMITED_WITH_REASON"; - return { - requested_result_mode: requestedResultMode, - result_mode: "confirmed_balance", - evidence_strength: deriveAddressEvidenceStrength(input), - balance_confirmed: balanceConfirmed, - as_of_date_basis: asOfDateBasis ?? "period_end" - }; - } - if (requestedResultMode) { - return { - requested_result_mode: requestedResultMode - }; - } - return {}; -} - -type AddressResultSemantics = ReturnType; - -function mergeAddressResultSemantics( - base: AddressResultSemantics, - override: ComposeReplySemantics | undefined -): AddressResultSemantics { - if (!override) { - return base; - } - return { - ...base, - ...(override.result_mode ? { result_mode: override.result_mode } : {}), - ...(override.evidence_strength ? { evidence_strength: override.evidence_strength } : {}), - ...(typeof override.balance_confirmed === "boolean" ? { balance_confirmed: override.balance_confirmed } : {}) - }; -} - function withConfirmedBalanceFallbackReason( reasons: string[], requestedResultMode: AddressResultMode | undefined, - semantics: ComposeReplySemantics | undefined, + semantics: { result_mode?: AddressResultMode | null } | undefined, baseResultMode?: AddressResultMode ): string[] { if (requestedResultMode !== "confirmed_balance") { @@ -3120,7 +2967,7 @@ function buildLimitedExecutionResult(input: { semanticFrame?: AddressSemanticFrame | null; }): AddressExecutionResult { const accountScopeAudit = input.accountScopeAudit ?? buildDefaultAccountScopeAudit(input.filters); - const resultSemantics = deriveAddressResultSemantics({ + const coverageEvidence = resolveAddressCoverageEvidence({ intent: input.intent.intent, selectedRecipe: input.selectedRecipe, filters: input.filters, @@ -3128,12 +2975,11 @@ function buildLimitedExecutionResult(input: { responseType: "LIMITED_WITH_REASON", rowsMatched: input.rowsMatched }); - const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters, input.semanticFrame); const reasonsWithConfirmedFallback = withConfirmedBalanceFallbackReason( input.reasons, - requestedResultMode, + coverageEvidence.requested_result_mode ?? undefined, undefined, - resultSemantics.result_mode + coverageEvidence.result_mode ?? undefined ); const exactLimitedReason = input.intent.intent === "inventory_on_hand_as_of_date" @@ -3156,11 +3002,12 @@ function buildLimitedExecutionResult(input: { buildRouteExpectationAudit({ intent: input.intent.intent, selectedRecipe: input.selectedRecipe, - requestedResultMode: requestedResultMode, - resultMode: resultSemantics.result_mode + requestedResultMode: coverageEvidence.requested_result_mode ?? undefined, + resultMode: coverageEvidence.result_mode ?? undefined }); const runtimeReadiness = runtimeReadinessForLimitedCategory(input.category); const debugPayload = attachAddressTruthGate( + attachAddressCoverageEvidence( { detected_mode: input.mode.mode, detected_mode_confidence: input.mode.confidence, @@ -3212,10 +3059,23 @@ function buildLimitedExecutionResult(input: { route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, - ...resultSemantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: + typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: input.limitations, reasons }, + { + intent: input.intent.intent, + selectedRecipe: input.selectedRecipe, + filters: input.filters, + semanticFrame: input.semanticFrame ?? null, + responseType: "LIMITED_WITH_REASON", + rowsMatched: input.rowsMatched + }), { intent: input.intent.intent, filters: input.filters, @@ -3364,12 +3224,13 @@ export class AddressQueryService { capabilityAudit: buildCapabilityAudit(intent.intent), shadowRouteAudit: buildShadowRouteAudit({ intent: intent.intent, - requestedResultMode: resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame), + requestedResultMode: resolveAddressRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame) ?? undefined, filters: filters.extracted_filters }) }); } - const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame); + const requestedResultMode = + resolveAddressRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame) ?? undefined; const confirmedBalancePayablesIntent = (intent.intent === "list_payables_counterparties" || intent.intent === "payables_confirmed_as_of_date") && requestedResultMode === "confirmed_balance"; @@ -4098,31 +3959,29 @@ export class AddressQueryService { extraLimitations: string[] = [] ): AddressExecutionResult => { const responseType: AddressResponseType = "FACTUAL_SUMMARY"; - const semantics = mergeAddressResultSemantics( - deriveAddressResultSemantics({ - intent: intent.intent, - selectedRecipe: effectiveRecipeId, - filters: filters.extracted_filters, - semanticFrame, - responseType, - rowsMatched: 0 - }), - undefined - ); + const coverageEvidence = resolveAddressCoverageEvidence({ + intent: intent.intent, + selectedRecipe: effectiveRecipeId, + filters: filters.extracted_filters, + semanticFrame, + responseType, + rowsMatched: 0 + }); const factualNoRowsLimitations = [...filters.warnings, ...extraLimitations]; const factualNoRowsReasons = withConfirmedBalanceFallbackReason( [...baseReasons, noRowsReason], requestedResultMode, undefined, - semantics.result_mode + coverageEvidence.result_mode ?? undefined ); const routeExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: effectiveRecipeId, requestedResultMode, - resultMode: semantics.result_mode + resultMode: coverageEvidence.result_mode ?? undefined }); const debugPayload = attachAddressTruthGate( + attachAddressCoverageEvidence( { detected_mode: mode.mode, detected_mode_confidence: mode.confidence, @@ -4165,7 +4024,12 @@ export class AddressQueryService { route_expectation_expected_selected_recipes: routeExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, - ...semantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: + typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: factualNoRowsLimitations, reasons: factualNoRowsReasons, ...(capabilityAudit @@ -4185,6 +4049,14 @@ export class AddressQueryService { } : {}) }, + { + intent: intent.intent, + selectedRecipe: effectiveRecipeId, + filters: filters.extracted_filters, + semanticFrame, + responseType, + rowsMatched: 0 + }), { intent: intent.intent, filters: filters.extracted_filters, @@ -4245,26 +4117,30 @@ export class AddressQueryService { truthGateStatusHint?: AssistantTruthGateContractStatus | null; extractedFilters?: AddressFilterSet; }): AddressExecutionResult => { - const resultSemantics = mergeAddressResultSemantics( - deriveAddressResultSemantics({ - intent: intent.intent, - selectedRecipe: input.selectedRecipe, - filters: input.extractedFilters ?? filters.extracted_filters, - semanticFrame: input.semanticFrame ?? semanticFrame, - responseType: input.responseType, - rowsMatched: input.rowsMatched - }), - input.responseSemantics - ); + const coverageEvidence = resolveAddressCoverageEvidence({ + intent: intent.intent, + selectedRecipe: input.selectedRecipe, + filters: input.extractedFilters ?? filters.extracted_filters, + semanticFrame: input.semanticFrame ?? semanticFrame, + responseType: input.responseType, + rowsMatched: input.rowsMatched, + overrideResultMode: input.responseSemantics?.result_mode ?? null, + overrideEvidenceStrength: input.responseSemantics?.evidence_strength ?? null, + overrideBalanceConfirmed: + typeof input.responseSemantics?.balance_confirmed === "boolean" + ? input.responseSemantics.balance_confirmed + : null + }); const routeExpectationAudit = input.routeExpectationAudit ?? buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: input.selectedRecipe, requestedResultMode, - resultMode: resultSemantics.result_mode + resultMode: coverageEvidence.result_mode ?? undefined }); const debugPayload = attachAddressTruthGate( + attachAddressCoverageEvidence( { detected_mode: mode.mode, detected_mode_confidence: mode.confidence, @@ -4308,7 +4184,12 @@ export class AddressQueryService { route_expectation_expected_requested_result_modes: routeExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: routeExpectationAudit.expectedResultModes, semantic_frame: input.semanticFrame ?? semanticFrame, - ...resultSemantics, + requested_result_mode: coverageEvidence.requested_result_mode ?? undefined, + result_mode: coverageEvidence.result_mode ?? undefined, + evidence_strength: coverageEvidence.evidence_strength ?? undefined, + balance_confirmed: + typeof coverageEvidence.balance_confirmed === "boolean" ? coverageEvidence.balance_confirmed : undefined, + as_of_date_basis: coverageEvidence.as_of_date_basis ?? undefined, limitations: input.limitations, reasons: input.reasons, ...(input.capabilityAudit @@ -4328,6 +4209,20 @@ export class AddressQueryService { } : {}) }, + { + intent: intent.intent, + selectedRecipe: input.selectedRecipe, + filters: input.extractedFilters ?? filters.extracted_filters, + semanticFrame: input.semanticFrame ?? semanticFrame, + responseType: input.responseType, + rowsMatched: input.rowsMatched, + overrideResultMode: input.responseSemantics?.result_mode ?? null, + overrideEvidenceStrength: input.responseSemantics?.evidence_strength ?? null, + overrideBalanceConfirmed: + typeof input.responseSemantics?.balance_confirmed === "boolean" + ? input.responseSemantics.balance_confirmed + : null + }), { intent: intent.intent, filters: input.extractedFilters ?? filters.extracted_filters, @@ -4645,22 +4540,25 @@ export class AddressQueryService { "period_window_auto_broadened_to_available_data" ]; const broadenedReasons = [...baseReasons, ...broadenedAdjustments, "period_window_auto_broadened_to_available_data"]; - const broadenedResultSemantics = mergeAddressResultSemantics( - deriveAddressResultSemantics({ - intent: intent.intent, - selectedRecipe: broadenedSelection.selected_recipe.recipe_id, - filters: filters.extracted_filters, - semanticFrame, - responseType: broadenedFactual.responseType, - rowsMatched: broadenedFilteredRows.length - }), - broadenedFactual.semantics - ); + const broadenedCoverageEvidence = resolveAddressCoverageEvidence({ + intent: intent.intent, + selectedRecipe: broadenedSelection.selected_recipe.recipe_id, + filters: filters.extracted_filters, + semanticFrame, + responseType: broadenedFactual.responseType, + rowsMatched: broadenedFilteredRows.length, + overrideResultMode: broadenedFactual.semantics?.result_mode ?? null, + overrideEvidenceStrength: broadenedFactual.semantics?.evidence_strength ?? null, + overrideBalanceConfirmed: + typeof broadenedFactual.semantics?.balance_confirmed === "boolean" + ? broadenedFactual.semantics.balance_confirmed + : null + }); const broadenedRouteExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: broadenedSelection.selected_recipe.recipe_id, requestedResultMode, - resultMode: broadenedResultSemantics.result_mode + resultMode: broadenedCoverageEvidence.result_mode ?? undefined }); return buildFactualExecutionResult({ replyText: injectNoticeAfterLeadLine(broadenedFactual.text, broadenedPrefix), @@ -5085,22 +4983,23 @@ export class AddressQueryService { : [] : []; const factualLimitations = [...filters.warnings, ...vatProbeLimitations]; - const factualResultSemantics = mergeAddressResultSemantics( - deriveAddressResultSemantics({ - intent: composeIntent, - selectedRecipe: effectiveRecipeId, - filters: filters.extracted_filters, - semanticFrame, - responseType: factual.responseType, - rowsMatched: filteredRows.length - }), - factual.semantics - ); + const factualCoverageEvidence = resolveAddressCoverageEvidence({ + intent: composeIntent, + selectedRecipe: effectiveRecipeId, + filters: filters.extracted_filters, + semanticFrame, + responseType: factual.responseType, + rowsMatched: filteredRows.length, + overrideResultMode: factual.semantics?.result_mode ?? null, + overrideEvidenceStrength: factual.semantics?.evidence_strength ?? null, + overrideBalanceConfirmed: + typeof factual.semantics?.balance_confirmed === "boolean" ? factual.semantics.balance_confirmed : null + }); const finalRouteExpectationAudit = buildRouteExpectationAudit({ intent: routeExpectationIntent, selectedRecipe: effectiveRecipeId, requestedResultMode, - resultMode: factualResultSemantics.result_mode + resultMode: factualCoverageEvidence.result_mode ?? undefined }); if (finalRouteExpectationAudit.status === "mismatch" && FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1) { return finalizeLimitedResult({ @@ -5142,7 +5041,7 @@ export class AddressQueryService { (intent.intent === "vat_payable_confirmed_as_of_date" && composeIntent === "vat_payable_confirmed_as_of_date") || (intent.intent === "vat_liability_confirmed_for_tax_period" && composeIntent === "vat_liability_confirmed_for_tax_period"); - if (exactConfirmedIntent && factualResultSemantics.balance_confirmed !== true) { + if (exactConfirmedIntent && factualCoverageEvidence.balance_confirmed !== true) { const exactModeName = intent.intent === "payables_confirmed_as_of_date" ? "payables" @@ -5218,7 +5117,7 @@ export class AddressQueryService { reasonsWithRouteExpectation, requestedResultMode, factual.semantics, - factualResultSemantics.result_mode + factualCoverageEvidence.result_mode ?? undefined ), routeExpectationAudit: finalRouteExpectationAudit, capabilityAudit, diff --git a/llm_normalizer/backend/src/services/assistantTruthAnswerPolicyRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantTruthAnswerPolicyRuntimeAdapter.ts index d83e7f8..8ae508e 100644 --- a/llm_normalizer/backend/src/services/assistantTruthAnswerPolicyRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantTruthAnswerPolicyRuntimeAdapter.ts @@ -11,6 +11,7 @@ import { type AssistantTruthGateContractStatus, type AssistantTruthMode } from "../types/assistantRuntimeContracts"; +import { toAddressCoverageEvidenceContract } from "./addressCoverageEvidencePolicy"; import { toAddressTruthGateContract } from "./addressTruthGatePolicy"; import { resolveAssistantRuntimeContractShadow } from "./assistantRuntimeContractResolver"; @@ -123,6 +124,7 @@ function coverageStatusFrom( truthGateStatus: AssistantTruthGateContractStatus, groundingStatus: AssistantGroundingStatus ): AssistantCoverageStatus { + const explicitCoverageEvidence = toAddressCoverageEvidenceContract(debug.address_coverage_evidence_v1); if (truthGateStatus === "full_confirmed") { return "full"; } @@ -135,6 +137,9 @@ function coverageStatusFrom( if (toStringList(debug.missing_required_filters).length > 0 || groundingStatus === "route_mismatch_blocked" || groundingStatus === "no_grounded_answer") { return "blocked"; } + if (explicitCoverageEvidence) { + return explicitCoverageEvidence.coverage_status; + } const coverageReport = toRecordObject(input.coverageReport) ?? toRecordObject(debug.coverage_report); if (coverageReport) { @@ -186,6 +191,10 @@ function evidenceGradeFrom( groundingStatus: AssistantGroundingStatus, truthGateStatus: AssistantTruthGateContractStatus ): AssistantEvidenceGrade { + const explicitCoverageEvidence = toAddressCoverageEvidenceContract(debug.address_coverage_evidence_v1); + if (explicitCoverageEvidence?.evidence_strength && isEvidenceGrade(explicitCoverageEvidence.evidence_strength)) { + return explicitCoverageEvidence.evidence_strength; + } const explicit = toNonEmptyString(debug.evidence_strength); if (isEvidenceGrade(explicit)) { return explicit; @@ -230,11 +239,13 @@ function collectReasonCodes(input: { truthMode: AssistantTruthMode; truthGateStatus: AssistantTruthGateContractStatus; explicitGateReasonCodes: string[]; + explicitCoverageReasonCodes: string[]; }): string[] { const reasons: string[] = []; pushReason(reasons, `truth_gate_${input.truthGateStatus}`); pushReason(reasons, `truth_mode_${input.truthMode}`); input.explicitGateReasonCodes.forEach((item) => pushReason(reasons, item)); + input.explicitCoverageReasonCodes.forEach((item) => pushReason(reasons, item)); input.shadow.transition_contract_reason.forEach((item) => pushReason(reasons, item)); input.shadow.capability_contract_reason.forEach((item) => pushReason(reasons, item)); toStringList(input.debug.missing_required_filters).forEach((item) => pushReason(reasons, `missing_filter_${item}`)); @@ -318,6 +329,7 @@ export function resolveAssistantTruthAnswerPolicyRuntime( ): AssistantTruthAnswerPolicyRuntimeContract { const debug = toRecordObject(input.addressDebug) ?? {}; const explicitAddressTruthGate = toAddressTruthGateContract(debug.address_truth_gate_v1); + const explicitCoverageEvidence = toAddressCoverageEvidenceContract(debug.address_coverage_evidence_v1); const shadow = resolveAssistantRuntimeContractShadow({ addressDebug: debug, addressRuntimeMeta: input.addressRuntimeMeta, @@ -340,7 +352,8 @@ export function resolveAssistantTruthAnswerPolicyRuntime( shadow, truthMode, truthGateStatus, - explicitGateReasonCodes: explicitAddressTruthGate?.reason_codes ?? [] + explicitGateReasonCodes: explicitAddressTruthGate?.reason_codes ?? [], + explicitCoverageReasonCodes: explicitCoverageEvidence?.reason_codes ?? [] }); const shape = answerShapeFrom({ coverageStatus, diff --git a/llm_normalizer/backend/src/types/addressQuery.ts b/llm_normalizer/backend/src/types/addressQuery.ts index b68461a..1890d94 100644 --- a/llm_normalizer/backend/src/types/addressQuery.ts +++ b/llm_normalizer/backend/src/types/addressQuery.ts @@ -1,6 +1,7 @@ export type AddressQuestionMode = "address_query" | "deep_analysis" | "unsupported"; import type { + AssistantCoverageStatus, AssistantCarryoverDepth, AssistantTruthGateContractStatus } from "./assistantRuntimeContracts"; @@ -102,6 +103,12 @@ export type AddressRuntimeReadiness = | "REQUIRES_SPECIALIZED_RECIPE" | "DEEP_ONLY" | "UNKNOWN"; +export type AddressCoverageEvidenceBasis = + | "matched_rows" + | "exact_negative" + | "limited_response" + | "heuristic_candidates" + | "unknown"; export type AddressMcpCallStatus = | "skipped" @@ -266,6 +273,18 @@ export interface AddressExecutionDebug { | "rows_remaining_after_scope_filter"; runtime_readiness: AddressRuntimeReadiness; limited_reason_category: AddressLimitedReasonCategory | null; + address_coverage_evidence_v1?: { + schema_version: "address_coverage_evidence_v1"; + policy_owner: "addressCoverageEvidencePolicy"; + requested_result_mode: AddressResultMode | null; + result_mode: AddressResultMode | null; + evidence_strength: AddressEvidenceStrength | null; + balance_confirmed: boolean | null; + as_of_date_basis: AddressAsOfDateBasis | null; + coverage_status: AssistantCoverageStatus; + evidence_basis: AddressCoverageEvidenceBasis; + reason_codes: string[]; + } | null; address_truth_gate_v1?: { schema_version: "address_truth_gate_v1"; policy_owner: "addressTruthGatePolicy"; diff --git a/llm_normalizer/backend/tests/addressCounterpartyItemFlowAndOpenItemsRoute.test.ts b/llm_normalizer/backend/tests/addressCounterpartyItemFlowAndOpenItemsRoute.test.ts index 6026356..bc9a7c3 100644 --- a/llm_normalizer/backend/tests/addressCounterpartyItemFlowAndOpenItemsRoute.test.ts +++ b/llm_normalizer/backend/tests/addressCounterpartyItemFlowAndOpenItemsRoute.test.ts @@ -114,6 +114,8 @@ describe("counterparty shipment item flow and open-items routing", () => { expect(result?.handled).toBe(true); expect(result?.response_type).toBe("FACTUAL_LIST"); expect(result?.debug.detected_intent).toBe("list_documents_by_counterparty"); + expect(result?.debug.address_coverage_evidence_v1?.coverage_status).toBe("full"); + expect(result?.debug.address_coverage_evidence_v1?.evidence_basis).toBe("matched_rows"); expect(result?.debug.address_truth_gate_v1?.truth_gate_status).toBe("full_confirmed"); expect(String(result?.reply_text ?? "")).toContain("Контрагент: Чепурнов П.Д."); expect(String(result?.reply_text ?? "")).toContain("Позиции:"); @@ -204,6 +206,8 @@ describe("counterparty shipment item flow and open-items routing", () => { expect(String(result?.reply_text ?? "")).toContain("исходящих оплат поставщику"); expect(String(result?.reply_text ?? "")).toContain("возвратов от поставщика"); expect(String(result?.reply_text ?? "")).toContain("Договор:"); + expect(result?.debug.address_coverage_evidence_v1?.coverage_status).toBe("blocked"); + expect(result?.debug.address_coverage_evidence_v1?.evidence_basis).toBe("unknown"); expect(result?.debug.reasons).toContain("counterparty_item_flow_no_supply_but_bank_activity_explained"); expect(executeAddressMcpQueryMock.mock.calls.length).toBeGreaterThanOrEqual(2); diff --git a/llm_normalizer/backend/tests/addressCoverageEvidencePolicy.test.ts b/llm_normalizer/backend/tests/addressCoverageEvidencePolicy.test.ts new file mode 100644 index 0000000..cad9b40 --- /dev/null +++ b/llm_normalizer/backend/tests/addressCoverageEvidencePolicy.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, it } from "vitest"; +import { resolveAddressCoverageEvidence } from "../src/services/addressCoverageEvidencePolicy"; + +describe("address coverage evidence policy", () => { + it("marks matched inventory balance replies as full confirmed-balance evidence", () => { + const contract = resolveAddressCoverageEvidence({ + intent: "inventory_on_hand_as_of_date", + selectedRecipe: "address_inventory_on_hand_as_of_date_v1", + filters: { + as_of_date: "2021-03-31", + period_from: "2021-03-01", + period_to: "2021-03-31" + }, + responseType: "FACTUAL_SUMMARY", + rowsMatched: 3 + }); + + expect(contract.requested_result_mode).toBe("confirmed_balance"); + expect(contract.result_mode).toBe("confirmed_balance"); + expect(contract.coverage_status).toBe("full"); + expect(contract.evidence_strength).toBe("strong"); + expect(contract.balance_confirmed).toBe(true); + expect(contract.evidence_basis).toBe("matched_rows"); + expect(contract.as_of_date_basis).toBe("explicit_as_of_date"); + }); + + it("treats factual exact negatives as full confirmed-balance evidence instead of partial noise", () => { + const contract = resolveAddressCoverageEvidence({ + intent: "payables_confirmed_as_of_date", + selectedRecipe: "address_payables_confirmed_as_of_date_v1", + filters: { + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31" + }, + responseType: "FACTUAL_SUMMARY", + rowsMatched: 0 + }); + + expect(contract.result_mode).toBe("confirmed_balance"); + expect(contract.coverage_status).toBe("full"); + expect(contract.balance_confirmed).toBe(true); + expect(contract.evidence_basis).toBe("exact_negative"); + expect(contract.reason_codes).toContain("evidence_basis_exact_negative"); + }); + + it("keeps heuristic candidate answers partial even when rows exist", () => { + const contract = resolveAddressCoverageEvidence({ + intent: "open_items_by_counterparty_or_contract", + selectedRecipe: "address_open_items_by_party_or_contract_v1", + filters: { + account: "60", + period_from: "2022-08-01", + period_to: "2022-08-31" + }, + responseType: "FACTUAL_LIST", + rowsMatched: 4 + }); + + expect(contract.requested_result_mode).toBe("confirmed_balance"); + expect(contract.result_mode).toBe("confirmed_balance"); + expect(contract.coverage_status).toBe("partial"); + expect(contract.evidence_strength).toBe("medium"); + expect(contract.balance_confirmed).toBe(false); + }); + + it("marks limited replies as blocked or partial according to the effective result mode", () => { + const confirmed = resolveAddressCoverageEvidence({ + intent: "inventory_on_hand_as_of_date", + selectedRecipe: "address_inventory_on_hand_as_of_date_v1", + filters: { + as_of_date: "2021-03-31" + }, + responseType: "LIMITED_WITH_REASON", + rowsMatched: 0 + }); + const heuristic = resolveAddressCoverageEvidence({ + intent: "list_open_contracts", + selectedRecipe: "address_list_open_contracts_v1", + filters: {}, + responseType: "LIMITED_WITH_REASON", + rowsMatched: 0 + }); + + expect(confirmed.coverage_status).toBe("blocked"); + expect(confirmed.evidence_basis).toBe("limited_response"); + expect(heuristic.coverage_status).toBe("partial"); + expect(heuristic.result_mode).toBe("heuristic_candidates"); + }); +}); diff --git a/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts b/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts index 60257c6..9591b16 100644 --- a/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts +++ b/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts @@ -107,6 +107,8 @@ describe("inventory selected-object follow-up", () => { expect(result?.debug.capability_route_mode).toBe("exact"); expect(result?.debug.reasons).toContain("period_window_auto_broadened_to_available_data"); expect(result?.debug.limitations).toContain("period_window_auto_broadened_to_available_data"); + expect(result?.debug.address_coverage_evidence_v1?.coverage_status).toBe("full"); + expect(result?.debug.address_coverage_evidence_v1?.evidence_basis).toBe("matched_rows"); expect(result?.debug.address_truth_gate_v1?.truth_gate_status).toBe("limited_temporal_or_contextual"); expect(result?.debug.address_truth_gate_v1?.carryover_eligibility).toBe("object_only"); const replyLines = String(result?.reply_text ?? "").split("\n"); diff --git a/llm_normalizer/backend/tests/assistantTruthAnswerPolicyRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantTruthAnswerPolicyRuntimeAdapter.test.ts index bfa5e8d..9c2f3ee 100644 --- a/llm_normalizer/backend/tests/assistantTruthAnswerPolicyRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantTruthAnswerPolicyRuntimeAdapter.test.ts @@ -94,6 +94,18 @@ describe("assistant truth answer policy runtime adapter", () => { addressDebug: { capability_id: "inventory_counterparty_item_flow", rows_matched: 0, + address_coverage_evidence_v1: { + schema_version: "address_coverage_evidence_v1", + policy_owner: "addressCoverageEvidencePolicy", + requested_result_mode: "confirmed_balance", + result_mode: "confirmed_balance", + evidence_strength: "medium", + balance_confirmed: true, + as_of_date_basis: "period_end", + coverage_status: "full", + evidence_basis: "exact_negative", + reason_codes: ["evidence_basis_exact_negative"] + }, address_truth_gate_v1: { schema_version: "address_truth_gate_v1", policy_owner: "addressTruthGatePolicy", @@ -111,8 +123,10 @@ describe("assistant truth answer policy runtime adapter", () => { expect(policy.truth_gate.coverage_status).toBe("full"); expect(policy.truth_gate.grounding_status).toBe("grounded"); expect(policy.truth_gate.truth_mode).toBe("confirmed"); + expect(policy.truth_gate.evidence_grade).toBe("medium"); expect(policy.truth_gate.carryover_eligibility).toBe("root_only"); expect(policy.truth_gate.reason_codes).toContain("counterparty_item_flow_exact_negative_response"); + expect(policy.truth_gate.reason_codes).toContain("evidence_basis_exact_negative"); expect(policy.answer_shape.answer_shape).toBe("confirmed_factual"); expect(policy.answer_shape.may_power_followup).toBe(true); }); diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index 2b5aaa9..bf5a448 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,37 @@ [ + { + "generation_id": "gen-ag04170808-1907fa", + "created_at": "2026-04-17T08:08:08+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "count": 5, + "domain": "address_phase4_coverage_evidence_mix", + "questions": [ + "покажи все документы по чепурнову", + "что нам отгружал чепурнов, какой товар или услугу?", + "какие остатки на складе на март 2021", + "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?", + "покажи еще раз остатки на эту же дату" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260417080808_gen-ag04170808-1907fa.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260417080808_gen-ag04170808-1907fa.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "coverage/evidence contract on factual, fallback, and root-reset branches", + "architecture_phase": "turnaround_11_phase4", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_coverage_evidence_mix.json" + } + }, { "generation_id": "gen-mo2kcds2-tlqmvng", "created_at": "2026-04-17T07:04:48.581Z", @@ -546,4 +579,4 @@ "saved_case_set_kind": null } } -] \ No newline at end of file +] diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260417080808_gen-ag04170808-1907fa.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260417080808_gen-ag04170808-1907fa.json new file mode 100644 index 0000000..bfa7b27 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260417080808_gen-ag04170808-1907fa.json @@ -0,0 +1,83 @@ +{ + "saved_at": "2026-04-17T08:08:08+00:00", + "generation_id": "gen-ag04170808-1907fa", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "agent_run": true, + "questions": [ + "покажи все документы по чепурнову", + "что нам отгружал чепурнов, какой товар или услугу?", + "какие остатки на складе на март 2021", + "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?", + "покажи еще раз остатки на эту же дату" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "coverage/evidence contract on factual, fallback, and root-reset branches", + "architecture_phase": "turnaround_11_phase4", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_coverage_evidence_mix.json" + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "покажи все документы по чепурнову", + "created_at": "2026-04-17T08:08:08+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "что нам отгружал чепурнов, какой товар или услугу?", + "created_at": "2026-04-17T08:08:08+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "какие остатки на складе на март 2021", + "created_at": "2026-04-17T08:08:08+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?", + "created_at": "2026-04-17T08:08:08+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "покажи еще раз остатки на эту же дату", + "created_at": "2026-04-17T08:08:08+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "coverage/evidence contract on factual, fallback, and root-reset branches", + "architecture_phase": "turnaround_11_phase4", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_coverage_evidence_mix.json" + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260417080808_gen-ag04170808-1907fa.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260417080808_gen-ag04170808-1907fa.json new file mode 100644 index 0000000..4ebc53a --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260417080808_gen-ag04170808-1907fa.json @@ -0,0 +1,40 @@ +{ + "suite_id": "assistant_saved_session_gen-ag04170808-1907fa", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-04-17T08:08:08+00:00", + "generation_id": "gen-ag04170808-1907fa", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "domain": "address_phase4_coverage_evidence_mix", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "покажи все документы по чепурнову" + }, + { + "user_message": "что нам отгружал чепурнов, какой товар или услугу?" + }, + { + "user_message": "какие остатки на складе на март 2021" + }, + { + "user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?" + }, + { + "user_message": "покажи еще раз остатки на эту же дату" + } + ] + } + ] +} diff --git a/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-GwhfwhCDWz.json b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-GwhfwhCDWz.json new file mode 100644 index 0000000..4eceaf8 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-GwhfwhCDWz.json @@ -0,0 +1,36 @@ +{ + "suite_id": "assistant_saved_session_runtime_job-GwhfwhCDWz", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_runtime_v0_1", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "saved_user_sessions_runtime", + "title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "покажи все документы по чепурнову" + }, + { + "user_message": "что нам отгружал чепурнов, какой товар или услугу?" + }, + { + "user_message": "какие остатки на складе на март 2021" + }, + { + "user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?" + }, + { + "user_message": "покажи еще раз остатки на эту же дату" + } + ] + } + ] +} \ No newline at end of file