diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index e41b627..73831e4 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -952,6 +952,23 @@ function isTemporalWarehousePhrase(candidate) { .trim(); return /^(?:в|на)\s+(?:январ(?:е|ь)|феврал(?:е|ь)|март(?:е)?|апрел(?:е|ь)|ма(?:й|е)|июн(?:е|ь)|июл(?:е|ь)|август(?:е)?|сентябр(?:е|ь)|октябр(?:е|ь)|ноябр(?:е|ь)|декабр(?:е|ь))(?:\s+\d{4}(?:\s+г(?:\.|ода)?)?)?$/iu.test(normalized); } +function normalizeSemanticAnchorCandidate(value) { + return cleanupAnchorValue(value) + .toLowerCase() + .replace(/С‘/g, "Рµ") + .replace(/\s+/g, " ") + .trim(); +} +function hasImplicitSelfScopeSignal(text) { + return /(?:^|[\s,.;:!?()\-])(?:у\s+нас|у\s+себя|у\s+меня|наш(?:ем|ей|его|их|а|е)?|сво(?:ем|ей|его|их|я|е)?)(?=$|[\s,.;:!?()\-])/iu.test(String(text ?? "")); +} +function isImplicitSelfScopeWarehouseAnchor(candidate) { + const normalized = normalizeSemanticAnchorCandidate(candidate); + return /^(?:у\s+нас|у\s+себя|у\s+меня|наш(?:ем|ей|его|их|а|е)?|сво(?:ем|ей|его|их|я|е)?)$/iu.test(normalized); +} +function hasSelectedObjectScopeSignal(text) { + return /(?:по\s+выбранному\s+объекту|selected\s+object)/iu.test(String(text ?? "")); +} function extractInventoryWarehouseAnchor(text) { const patterns = [ /(?:на|по)\s+склад(?:е|у|ом)?\s+[«"']?([^\r\n,.;:!?]+?)(?:[»"']|(?=\s+(?:на|по|за|с|в)\b|[?]|$))/iu, @@ -967,6 +984,7 @@ function extractInventoryWarehouseAnchor(text) { if (!candidate || candidate.includes("->") || candidate.includes("=>") || + isImplicitSelfScopeWarehouseAnchor(candidate) || normalizedCandidate.startsWith("по состоянию") || isTemporalWarehousePhrase(candidate) || /^(?:сейчас|на|дату|дате|остаток|остатки)$/iu.test(candidate)) { @@ -1076,6 +1094,95 @@ function shouldDefaultAsOfDateToToday(intent) { intent === "receivables_confirmed_as_of_date" || intent === "vat_payable_confirmed_as_of_date"); } +function resolveSemanticDateScopeKind(filters, warnings) { + if (warnings.includes("as_of_date_defaulted_today")) { + return "implicit_current"; + } + if ((typeof filters.as_of_date === "string" && filters.as_of_date.trim().length > 0) || + (typeof filters.period_from === "string" && filters.period_from.trim().length > 0) || + (typeof filters.period_to === "string" && filters.period_to.trim().length > 0)) { + return "explicit"; + } + return "none"; +} +function resolveSemanticDateBasisHint(filters, warnings) { + if (warnings.includes("as_of_date_defaulted_today")) { + return "implicit_current_snapshot"; + } + const hasAsOfDate = typeof filters.as_of_date === "string" && filters.as_of_date.trim().length > 0; + const hasPeriodFrom = typeof filters.period_from === "string" && filters.period_from.trim().length > 0; + const hasPeriodTo = typeof filters.period_to === "string" && filters.period_to.trim().length > 0; + if (hasPeriodFrom && hasPeriodTo) { + return "period_range"; + } + if (hasAsOfDate) { + return "explicit_as_of_date"; + } + if (hasPeriodTo) { + return "period_end"; + } + if (hasPeriodFrom) { + return "period_range"; + } + return null; +} +function buildSemanticFrame(text, filters, warnings) { + const selfScopeDetected = hasImplicitSelfScopeSignal(text); + const selectedObjectScopeDetected = hasSelectedObjectScopeSignal(text); + const itemAnchor = typeof filters.item === "string" && filters.item.trim().length > 0 ? filters.item.trim() : null; + const warehouseAnchor = typeof filters.warehouse === "string" && filters.warehouse.trim().length > 0 ? filters.warehouse.trim() : null; + const counterpartyAnchor = typeof filters.counterparty === "string" && filters.counterparty.trim().length > 0 ? filters.counterparty.trim() : null; + const contractAnchor = typeof filters.contract === "string" && filters.contract.trim().length > 0 ? filters.contract.trim() : null; + const organizationAnchor = typeof filters.organization === "string" && filters.organization.trim().length > 0 ? filters.organization.trim() : null; + if (selectedObjectScopeDetected && itemAnchor) { + return { + scope_kind: "selected_object_scope", + anchor_kind: "item", + anchor_value: itemAnchor, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: selfScopeDetected, + selected_object_scope_detected: true + }; + } + if (selfScopeDetected && !warehouseAnchor) { + return { + scope_kind: "implicit_self_scope", + anchor_kind: "self_scope", + anchor_value: null, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: true, + selected_object_scope_detected: selectedObjectScopeDetected + }; + } + const explicitAnchor = itemAnchor ?? + warehouseAnchor ?? + counterpartyAnchor ?? + contractAnchor ?? + organizationAnchor ?? + null; + const anchorKind = itemAnchor + ? "item" + : warehouseAnchor + ? "warehouse" + : counterpartyAnchor + ? "counterparty" + : contractAnchor + ? "contract" + : organizationAnchor + ? "organization" + : "none"; + return { + scope_kind: explicitAnchor ? "explicit_anchor" : "none", + anchor_kind: anchorKind, + anchor_value: explicitAnchor, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: selfScopeDetected, + selected_object_scope_detected: selectedObjectScopeDetected + }; +} function extractAddressFilters(userMessage, intent) { const rawText = String(userMessage ?? "").trim(); const text = normalizeMojibakeString(rawText); @@ -1130,6 +1237,10 @@ function extractAddressFilters(userMessage, intent) { if (warehouseAnchor) { filters.warehouse = warehouseAnchor; } + else if ((intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date") && + hasImplicitSelfScopeSignal(text)) { + warnings.push("warehouse_self_scope_detected"); + } if (intent === "inventory_supplier_stock_overlap_as_of_date") { const supplierAnchor = asksForInventorySupplierIdentity(text) ? undefined : extractInventorySupplierAnchor(text); if (supplierAnchor) { @@ -1311,9 +1422,11 @@ function extractAddressFilters(userMessage, intent) { const value = filters[key]; return value === undefined || value === null || String(value).trim() === ""; }); + const semanticFrame = buildSemanticFrame(text, filters, warnings); return { extracted_filters: filters, missing_required_filters: missingRequiredFilters, - warnings + warnings, + semantic_frame: semanticFrame }; } diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index 7b49328..873981c 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1309,7 +1309,8 @@ function hasInventoryAsOfCue(text) { return /(?:сейчас|текущ|на\s+дату|по\s+состоянию|срез|на\s+конец|date|as\s+of|current|now|today)/iu.test(text); } function hasInventoryOnHandSignal(text) { - const hasColloquialStockSnapshotCue = /(?:что|ч[её])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом)(?=$|[\s,.;:!?])/iu.test(text); + const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test(text); + const hasStockStateCue = /(?:(?:что|ч[еёо])\s+там\s+на\s+склад(?:е|у|ом|ах)|(?:что|ч[еёо]).*происход(?:ит|ило|ящее).*(?:на\s+)?склад(?:е|у|ом|ах)|происход(?:ит|ило|ящее)\s+на\s+склад(?:е|у|ом|ах)|ситуац(?:ия|ии)\s+на\s+склад(?:е|у|ом|ах)|обстановк(?:а|и)\s+на\s+склад(?:е|у|ом|ах)|what(?:'s| is)?\s+(?:there\s+)?(?:on|in)\s+(?:the\s+)?(?:warehouse|stock)|what(?:'s| is)?\s+happening\s+(?:on|in)\s+(?:the\s+)?(?:warehouse|stock))/iu.test(text); const hasAccount41Anchor = hasInventoryAccount41Anchor(text); const hasStockLexeme = /(?:склад(?:е|у|ом|ы|ов)?|warehouse|stock(?:room)?|inventory|on[\s-]?hand)/iu.test(text); if (!hasStockLexeme && !hasAccount41Anchor) { @@ -1323,13 +1324,13 @@ function hasInventoryOnHandSignal(text) { return false; } const hasGoodsLexeme = /(?:товар(?:ы|ов|ом|а|ные)?|номенклатур|материал(?:ы|ов|а|ам)?|item(?:s)?|sku|product(?:s)?)/iu.test(text); - const hasBalanceLexeme = /(?:леж(?:ит|ат)|есть|числ(?:ит(?:ся|сь)|ятся)|остат(?:ок|ки)|срез|на\s+дат|по\s+состоянию|на\s+конец|today|now|current|as\s+of)/iu.test(text); - const hasRequestCue = /(?:покажи|показать|выведи|дай|какие|что|какой|сколько|show|list|which|what)/iu.test(text); + const hasBalanceLexeme = /(?:леж(?:ит|ат)|есть|числ(?:ит(?:ся|сь)|ятся)|остат(?:ок|ки)|срез|на\s+дат|по\s+состоянию|на\s+конец|происход(?:ит|ило|ящее)|ситуац(?:ия|ии)|обстановк(?:а|и)|today|now|current|as\s+of)/iu.test(text); + const hasRequestCue = /(?:покажи|показать|выведи|дай|какие|что|ч[еёо]|какой|сколько|проверь|проверить|чекни|check|show|list|which|what)/iu.test(text); if (hasAccount41Anchor && (hasGoodsLexeme || hasBalanceLexeme || hasRequestCue || hasInventoryAsOfCue(text))) { return true; } - return (hasGoodsLexeme || hasBalanceLexeme || hasColloquialStockSnapshotCue) && - (hasRequestCue || hasBalanceLexeme || hasColloquialStockSnapshotCue); + return (hasGoodsLexeme || hasBalanceLexeme || hasColloquialStockSnapshotCue || hasStockStateCue) && + (hasRequestCue || hasBalanceLexeme || hasColloquialStockSnapshotCue || hasStockStateCue); } function hasInventoryProvenanceSignal(text) { return /(?:поставщик|закупк|РїСЂРѕРёСЃС…РѕР¶Рґ|откуда|РєРѕРіРґР° был куплен|активная закупк|purchase provenance|purchase date|supplier provenance|stock overlap)/iu.test(text); diff --git a/llm_normalizer/backend/dist/services/addressQueryClassifier.js b/llm_normalizer/backend/dist/services/addressQueryClassifier.js index 6906d78..fc0a74e 100644 --- a/llm_normalizer/backend/dist/services/addressQueryClassifier.js +++ b/llm_normalizer/backend/dist/services/addressQueryClassifier.js @@ -14,6 +14,14 @@ const ADDRESS_ACTION_TOKENS = [ "покажи", "покаж", "показ", + "проверь", + "провер", + "чекни", + "чекн", + "глянь", + "глян", + "посмотри", + "смотри", "список", "найди", "найд", diff --git a/llm_normalizer/backend/dist/services/addressQueryService.js b/llm_normalizer/backend/dist/services/addressQueryService.js index 3018c4d..684c5ab 100644 --- a/llm_normalizer/backend/dist/services/addressQueryService.js +++ b/llm_normalizer/backend/dist/services/addressQueryService.js @@ -9,6 +9,7 @@ const resolveStage_1 = require("./address_runtime/resolveStage"); const composeStage_1 = require("./address_runtime/composeStage"); const addressCapabilityPolicy_1 = require("./addressCapabilityPolicy"); const addressRouteExpectations_1 = require("./addressRouteExpectations"); +const assistantOrganizationMatcher_1 = require("./assistantOrganizationMatcher"); const ACCOUNT_SCOPE_FIELDS_CHECKED = ["account_dt", "account_kt", "registrator", "analytics"]; const ACCOUNT_SCOPE_MATCH_STRATEGY = "account_code_regex_plus_alias_map_v1"; const ADDRESS_ANCHOR_RECOVERY_LIMIT = 1000; @@ -1183,6 +1184,43 @@ function isCounterpartyRiskIntent(intent) { intent === "list_open_contracts" || intent === "open_items_by_counterparty_or_contract"); } +function sameNormalizedOrganizationScope(left, right) { + return (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeSearchText)(left ?? "") === (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeSearchText)(right ?? ""); +} +function applyPreExecutionOrganizationScopeGrounding(input) { + const activeOrganization = (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(input.activeOrganization ?? null); + const candidateOrganizations = (0, assistantOrganizationMatcher_1.mergeKnownOrganizations)([ + ...(Array.isArray(input.knownOrganizations) ? input.knownOrganizations : []), + activeOrganization + ]); + const resolvedOrganizationFromMessage = (0, assistantOrganizationMatcher_1.resolveOrganizationSelectionFromMessage)(input.userMessage, candidateOrganizations); + if (!input.filters.organization && + input.semanticFrame?.scope_kind === "implicit_self_scope" && + activeOrganization) { + input.filters.organization = activeOrganization; + if (!input.warnings.includes("organization_from_active_scope")) { + input.warnings.push("organization_from_active_scope"); + } + if (!input.baseReasons.includes("organization_from_active_scope")) { + input.baseReasons.push("organization_from_active_scope"); + } + } + if (resolvedOrganizationFromMessage && + (!input.filters.organization || input.semanticFrame?.anchor_kind === "organization") && + !sameNormalizedOrganizationScope(input.filters.organization ?? null, resolvedOrganizationFromMessage)) { + input.filters.organization = resolvedOrganizationFromMessage; + if (!input.warnings.includes("organization_grounded_from_scope_candidates")) { + input.warnings.push("organization_grounded_from_scope_candidates"); + } + if (!input.baseReasons.includes("organization_grounded_from_scope_candidates")) { + input.baseReasons.push("organization_grounded_from_scope_candidates"); + } + if (input.semanticFrame?.anchor_kind === "organization") { + input.semanticFrame.anchor_value = resolvedOrganizationFromMessage; + } + } + return resolvedOrganizationFromMessage; +} function isHeuristicCandidatesIntent(intent) { return (intent === "list_receivables_counterparties" || intent === "list_payables_counterparties" || @@ -1203,7 +1241,10 @@ function isConfirmedBalanceIntent(intent) { intent === "vat_payable_confirmed_as_of_date" || intent === "vat_liability_confirmed_for_tax_period"); } -function resolveAsOfDateBasis(filters) { +function resolveAsOfDateBasis(filters, semanticFrame) { + if (semanticFrame?.date_basis_hint) { + return semanticFrame.date_basis_hint; + } const asOfDate = normalizeAnalysisDateHint(filters.as_of_date); if (asOfDate) { return "explicit_as_of_date"; @@ -1239,7 +1280,7 @@ function deriveAddressEvidenceStrength(input) { } return undefined; } -function resolveRequestedResultMode(intent, filters) { +function resolveRequestedResultMode(intent, filters, semanticFrame) { if (isConfirmedBalanceIntent(intent)) { return "confirmed_balance"; } @@ -1247,8 +1288,11 @@ function resolveRequestedResultMode(intent, filters) { return "heuristic_candidates"; } if (isHeuristicCandidatesIntent(intent)) { - const asOfDateBasis = resolveAsOfDateBasis(filters); - if (asOfDateBasis === "explicit_as_of_date" || asOfDateBasis === "period_end" || asOfDateBasis === "period_range") { + const asOfDateBasis = resolveAsOfDateBasis(filters, semanticFrame); + if (asOfDateBasis === "explicit_as_of_date" || + asOfDateBasis === "period_end" || + asOfDateBasis === "period_range" || + asOfDateBasis === "implicit_current_snapshot") { return "confirmed_balance"; } return "heuristic_candidates"; @@ -1256,8 +1300,8 @@ function resolveRequestedResultMode(intent, filters) { return undefined; } function deriveAddressResultSemantics(input) { - const asOfDateBasis = resolveAsOfDateBasis(input.filters); - const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters); + const asOfDateBasis = resolveAsOfDateBasis(input.filters, input.semanticFrame); + const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters, input.semanticFrame); if (isHeuristicCandidatesIntent(input.intent)) { return { requested_result_mode: requestedResultMode, @@ -1542,6 +1586,9 @@ function shouldBoostAutoBroadenedLimit(intent) { intent === "inventory_purchase_to_sale_chain" || intent === "inventory_aging_by_purchase_date"); } +function shouldClearAsOfDateForHistoryRecovery(intent) { + return intent === "inventory_purchase_provenance_for_item" || intent === "inventory_purchase_documents_for_item"; +} function invertSort(sort) { return sort === "period_asc" ? "period_desc" : "period_asc"; } @@ -2097,10 +2144,11 @@ function buildLimitedExecutionResult(input) { intent: input.intent.intent, selectedRecipe: input.selectedRecipe, filters: input.filters, + semanticFrame: input.semanticFrame, responseType: "LIMITED_WITH_REASON", rowsMatched: input.rowsMatched }); - const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters); + const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters, input.semanticFrame); const reasonsWithConfirmedFallback = withConfirmedBalanceFallbackReason(input.reasons, requestedResultMode, undefined, resultSemantics.result_mode); const exactLimitedReason = input.intent.intent === "inventory_on_hand_as_of_date" ? "exact_inventory_mode_limited_response" @@ -2172,6 +2220,7 @@ function buildLimitedExecutionResult(input) { account_scope_drop_reason: accountScopeAudit.accountScopeDropReason, runtime_readiness: runtimeReadinessForLimitedCategory(input.category), limited_reason_category: input.category, + semantic_frame: input.semanticFrame ?? null, response_type: "LIMITED_WITH_REASON", capability_id: input.capabilityAudit?.capabilityId ?? null, capability_layer: input.capabilityAudit?.layer ?? null, @@ -2198,11 +2247,12 @@ class AddressQueryService { return null; } const followupContext = options.followupContext ?? null; - const decompose = (0, decomposeStage_1.runAddressDecomposeStage)(userMessage, followupContext); + const decompose = (0, decomposeStage_1.runAddressDecomposeStage)(userMessage, followupContext, options.llmSemanticHints ?? null); if (!decompose) { return null; } const { mode, shape, intent, filters } = decompose; + const semanticFrame = filters.semantic_frame ?? null; const baseReasons = [...decompose.baseReasons]; const analysisDate = normalizeAnalysisDateHint(options.analysisDateHint); if (analysisDate) { @@ -2218,7 +2268,16 @@ class AddressQueryService { baseReasons.push("as_of_date_from_analysis_context"); } } - const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters); + const resolvedOrganizationFromMessage = applyPreExecutionOrganizationScopeGrounding({ + userMessage, + filters: filters.extracted_filters, + semanticFrame, + warnings: filters.warnings, + baseReasons, + activeOrganization: options.activeOrganization ?? null, + knownOrganizations: options.knownOrganizations ?? [] + }); + const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame); const confirmedBalancePayablesIntent = (intent.intent === "list_payables_counterparties" || intent.intent === "payables_confirmed_as_of_date") && requestedResultMode === "confirmed_balance"; const confirmedBalanceReceivablesIntent = intent.intent === "receivables_confirmed_as_of_date" && requestedResultMode === "confirmed_balance"; @@ -2236,7 +2295,7 @@ class AddressQueryService { const inventoryConfirmedExecution = confirmedBalanceInventoryIntent ? resolveExecutionFiltersForConfirmedBalance(filters.extracted_filters, analysisDate) : null; - const executionFilters = inventoryConfirmedExecution?.executionFilters ?? + let executionFilters = inventoryConfirmedExecution?.executionFilters ?? payablesConfirmedExecution?.executionFilters ?? receivablesConfirmedExecution?.executionFilters ?? vatPayableConfirmedExecution?.executionFilters ?? @@ -2303,6 +2362,7 @@ class AddressQueryService { ...baseReasons, config_1.FEATURE_ASSISTANT_CAPABILITY_ROUTE_GUARD_V1 ? "capability_route_guard_blocked" : "capability_route_guard_skipped" ], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2384,6 +2444,7 @@ class AddressQueryService { nextStep: "могу проверить близкие сценарии: документы/платежи по контрагенту, договоры или остаток по счету", limitations: ["intent_not_supported_in_v1"], reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2405,6 +2466,7 @@ class AddressQueryService { nextStep: "можно выбрать близкий поддерживаемый сценарий или переключить запрос в режим расширенной проверки", limitations: ["recipe_not_available"], reasons: [...baseReasons, ...recipeSelection.selection_reason], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2426,6 +2488,7 @@ class AddressQueryService { nextStep: `уточните: ${recipeSelection.missing_required_filters.join(", ")}`, limitations: ["missing_required_filters"], reasons: [...baseReasons, ...recipeSelection.selection_reason], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2447,6 +2510,7 @@ class AddressQueryService { nextStep: "включите FEATURE_ASSISTANT_ADDRESS_QUERY_LIVE_V1", limitations: ["address_live_lane_disabled"], reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2621,6 +2685,7 @@ class AddressQueryService { nextStep: mcp.error, limitations: ["mcp_call_failed"], reasons: [...baseReasons, mcp.error], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2633,7 +2698,7 @@ class AddressQueryService { scopedRows.length === 0; const normalizedRows = accountScopeFallbackApplied ? normalizedRawRows : scopedRows; anchor = (0, resolveStage_1.refineAnchorFromRows)(anchor, normalizedRows); - const filtersForMatching = anchor.anchor_type === "counterparty" && anchor.anchor_value_resolved + let filtersForMatching = anchor.anchor_type === "counterparty" && anchor.anchor_value_resolved ? { ...executionFilters, counterparty: anchor.anchor_value_resolved } : anchor.anchor_type === "contract" && anchor.anchor_value_resolved ? { ...executionFilters, contract: anchor.anchor_value_resolved } @@ -2645,11 +2710,55 @@ class AddressQueryService { rowsBeforeScope: normalizedRawRows.length, rowsAfterScope: normalizedRows.length }); - const anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); - const filterByAnchors = anchorFilter.rows; - const filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); - const filteredRowsFutureGuard = applyFutureDatedRowsGuard(filteredRowsBeforeFutureGuard, intent.intent, futureGuardReferenceDate); - const filteredRows = filteredRowsFutureGuard.rows; + let anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); + let filterByAnchors = anchorFilter.rows; + let filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); + let filteredRowsFutureGuard = applyFutureDatedRowsGuard(filteredRowsBeforeFutureGuard, intent.intent, futureGuardReferenceDate); + let filteredRows = filteredRowsFutureGuard.rows; + let organizationWarehouseRecoveryApplied = false; + if (filteredRows.length === 0 && + anchorFilter.mismatchReason === "warehouse_anchor_not_matched_in_materialized_rows" && + resolvedOrganizationFromMessage) { + filters.extracted_filters = { + ...filters.extracted_filters, + organization: resolvedOrganizationFromMessage + }; + delete filters.extracted_filters.warehouse; + executionFilters = { + ...executionFilters, + organization: resolvedOrganizationFromMessage + }; + delete executionFilters.warehouse; + filtersForMatching = { + ...filtersForMatching, + organization: resolvedOrganizationFromMessage + }; + delete filtersForMatching.warehouse; + anchor = { + ...anchor, + anchor_type: "organization", + anchor_value_raw: anchor.anchor_value_raw, + anchor_value_resolved: resolvedOrganizationFromMessage, + resolver_confidence: "medium" + }; + if (semanticFrame) { + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "organization"; + semanticFrame.anchor_value = resolvedOrganizationFromMessage; + } + if (!filters.warnings.includes("warehouse_anchor_regrounded_to_organization_scope")) { + filters.warnings.push("warehouse_anchor_regrounded_to_organization_scope"); + } + if (!baseReasons.includes("warehouse_anchor_regrounded_to_organization_scope")) { + baseReasons.push("warehouse_anchor_regrounded_to_organization_scope"); + } + anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); + filterByAnchors = anchorFilter.rows; + filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); + filteredRowsFutureGuard = applyFutureDatedRowsGuard(filteredRowsBeforeFutureGuard, intent.intent, futureGuardReferenceDate); + filteredRows = filteredRowsFutureGuard.rows; + organizationWarehouseRecoveryApplied = filteredRows.length > 0; + } if (filteredRowsFutureGuard.droppedCount > 0) { if (!filters.warnings.includes("future_rows_excluded_from_response")) { filters.warnings.push("future_rows_excluded_from_response"); @@ -2675,6 +2784,11 @@ class AddressQueryService { : matchFailureStage === "materialized_but_filtered_out_by_recipe" ? "rows_filtered_out_by_intent_recipe_after_anchor_match" : null; + if (organizationWarehouseRecoveryApplied) { + if (!baseReasons.includes("organization_scope_live_grounding_recovered_rows")) { + baseReasons.push("organization_scope_live_grounding_recovered_rows"); + } + } if (filteredRows.length === 0 && intent.intent === "list_documents_by_contract" && filterByAnchors.length > 0) { const recoveredBankRows = applyIntentSpecificFilter("bank_operations_by_contract", filterByAnchors); const recoveredRows = recoveredBankRows.length > 0 ? recoveredBankRows : filterByAnchors; @@ -2732,6 +2846,7 @@ class AddressQueryService { intent: intent.intent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: factual.responseType, rowsMatched: recoveredRows.length }), factual.semantics), @@ -2855,6 +2970,7 @@ class AddressQueryService { intent: intent.intent, selectedRecipe: expandedSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: expandedFactual.responseType, rowsMatched: expandedFilteredRows.length }), expandedFactual.semantics), @@ -2870,8 +2986,13 @@ class AddressQueryService { } if (filteredRows.length === 0 && canAutoBroadenPeriodWindow(intent.intent, filters.extracted_filters)) { const autoBroadenedFilters = { ...filters.extracted_filters }; + const broadenedAdjustments = []; delete autoBroadenedFilters.period_from; delete autoBroadenedFilters.period_to; + if (stageStatus === "no_raw_rows" && shouldClearAsOfDateForHistoryRecovery(intent.intent) && toNonEmptyFilterValue(autoBroadenedFilters.as_of_date)) { + delete autoBroadenedFilters.as_of_date; + broadenedAdjustments.push("as_of_date_cleared_for_history_recovery"); + } if (shouldBoostAutoBroadenedLimit(intent.intent)) { autoBroadenedFilters.limit = Math.max(ADDRESS_ANCHOR_RECOVERY_LIMIT, typeof autoBroadenedFilters.limit === "number" && Number.isFinite(autoBroadenedFilters.limit) ? Math.max(1, Math.trunc(autoBroadenedFilters.limit)) @@ -2930,12 +3051,17 @@ class AddressQueryService { const observedWindow = deriveObservedPeriodWindow(broadenedFilteredRows); const broadenedPrefix = composeAutoBroadenedPeriodPrefix(filters.extracted_filters, observedWindow); const broadenedFactual = (0, composeStage_1.composeFactualReply)(intent.intent, broadenedFilteredRows, composeOptionsFromFilters(autoBroadenedFilters)); - const broadenedLimitations = [...filters.warnings, "period_window_auto_broadened_to_available_data"]; - const broadenedReasons = [...baseReasons, "period_window_auto_broadened_to_available_data"]; + const broadenedLimitations = [ + ...filters.warnings, + ...broadenedAdjustments, + "period_window_auto_broadened_to_available_data" + ]; + const broadenedReasons = [...baseReasons, ...broadenedAdjustments, "period_window_auto_broadened_to_available_data"]; const broadenedResultSemantics = mergeAddressResultSemantics(deriveAddressResultSemantics({ intent: intent.intent, selectedRecipe: broadenedSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: broadenedFactual.responseType, rowsMatched: broadenedFilteredRows.length }), broadenedFactual.semantics); @@ -3000,6 +3126,7 @@ class AddressQueryService { route_expectation_expected_selected_recipes: broadenedRouteExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: broadenedRouteExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: broadenedRouteExpectationAudit.expectedResultModes, + semantic_frame: semanticFrame, ...broadenedResultSemantics, limitations: broadenedLimitations, reasons: withConfirmedBalanceFallbackReason(broadenedReasons, requestedResultMode, broadenedFactual.semantics) @@ -3124,6 +3251,7 @@ class AddressQueryService { intent: intent.intent, selectedRecipe: historicalSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: historicalFactual.responseType, rowsMatched: historicalFilteredRows.length }), historicalFactual.semantics), @@ -3195,6 +3323,7 @@ class AddressQueryService { intent: intent.intent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: fallbackFactual.responseType, rowsMatched: documentBankFallbackRows.length }), fallbackFactual.semantics), @@ -3318,6 +3447,7 @@ class AddressQueryService { nextStep, limitations, reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -3351,6 +3481,7 @@ class AddressQueryService { intent: composeIntent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: factual.responseType, rowsMatched: filteredRows.length }), factual.semantics); @@ -3388,6 +3519,7 @@ class AddressQueryService { nextStep: "проверьте intent/recipe mapping или отключите FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 для безопасного rollout", limitations: ["route_expectation_mismatch_guard_blocked"], reasons: [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`], + semanticFrame, capabilityAudit, shadowRouteAudit, routeExpectationAudit: finalRouteExpectationAudit @@ -3437,6 +3569,7 @@ class AddressQueryService { : "specify as_of_date/counterparty or enable detailed settlement registers for exact confirmed balance", limitations: [`exact_${exactModeName}_mode_unconfirmed_output_blocked`], reasons: [...baseReasons, `exact_${exactModeName}_mode_unconfirmed_output_blocked`], + semanticFrame, capabilityAudit, shadowRouteAudit, routeExpectationAudit: finalRouteExpectationAudit @@ -3500,6 +3633,7 @@ class AddressQueryService { route_expectation_expected_selected_recipes: finalRouteExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: finalRouteExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: finalRouteExpectationAudit.expectedResultModes, + semantic_frame: semanticFrame, ...factualResultSemantics, limitations: factualLimitations, reasons: withConfirmedBalanceFallbackReason(reasonsWithRouteExpectation, requestedResultMode, factual.semantics, factualResultSemantics.result_mode) diff --git a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js index 336f31a..585b1be 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js +++ b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js @@ -6,6 +6,7 @@ const addressQueryClassifier_1 = require("../addressQueryClassifier"); const addressQueryShapeClassifier_1 = require("../addressQueryShapeClassifier"); const addressIntentResolver_1 = require("../addressIntentResolver"); const addressFilterExtractor_1 = require("../addressFilterExtractor"); +const semanticHintOverlay_1 = require("./semanticHintOverlay"); function hasExplicitPeriodWindow(filters) { return ((typeof filters.period_from === "string" && filters.period_from.trim().length > 0) || (typeof filters.period_to === "string" && filters.period_to.trim().length > 0)); @@ -253,6 +254,144 @@ function isInventoryIntent(intent) { intent === "inventory_purchase_to_sale_chain" || intent === "inventory_aging_by_purchase_date"); } +function isInventoryRootFrameIntent(intent) { + return intent === "inventory_on_hand_as_of_date"; +} +function isInventoryDrilldownFrameIntent(intent) { + return (intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date"); +} +function buildInventoryRootFollowupContext(followupContext) { + if (!followupContext || !followupContext.root_intent || !followupContext.root_filters) { + return followupContext; + } + return { + ...followupContext, + previous_intent: followupContext.root_intent, + previous_filters: { ...followupContext.root_filters }, + previous_anchor_type: followupContext.root_anchor_type ?? followupContext.previous_anchor_type, + previous_anchor_value: followupContext.root_anchor_value ?? followupContext.previous_anchor_value, + current_frame_kind: "inventory_root" + }; +} +function getTokenCount(text) { + return String(text ?? "") + .trim() + .split(/\s+/) + .filter(Boolean).length; +} +function resolveMonthNumberFromText(text) { + const normalized = String(text ?? "").toLowerCase(); + if (!normalized) { + return null; + } + if (/январ|january|jan/iu.test(normalized)) + return 1; + if (/феврал|february|feb/iu.test(normalized)) + return 2; + if (/март|march|mar/iu.test(normalized)) + return 3; + if (/апрел|april|apr/iu.test(normalized)) + return 4; + if (/(?:^|[\s,.;:!?()\-])ма(?:й|е|я)(?=$|[\s,.;:!?()\-])|may/iu.test(normalized)) + return 5; + if (/июн|june|jun/iu.test(normalized)) + return 6; + if (/июл|july|jul/iu.test(normalized)) + return 7; + if (/август|august|aug/iu.test(normalized)) + return 8; + if (/сентябр|september|sep/iu.test(normalized)) + return 9; + if (/октябр|october|oct/iu.test(normalized)) + return 10; + if (/ноябр|november|nov/iu.test(normalized)) + return 11; + if (/декабр|december|dec/iu.test(normalized)) + return 12; + return null; +} +function resolveYearFromFilters(filters) { + const candidates = [ + toNonEmptyString(filters?.as_of_date), + toNonEmptyString(filters?.period_to), + toNonEmptyString(filters?.period_from) + ]; + for (const candidate of candidates) { + const match = candidate?.match(/\b((?:19|20)\d{2})\b/u); + if (match) { + const year = Number(match[1]); + if (Number.isFinite(year)) { + return year; + } + } + } + return null; +} +function hasRelativeYearHint(text) { + return /(?:эт(?:от|ого)(?:\s+же)?\s+год|этого\s+же\s+года|того\s+же\s+года|this\s+year|same\s+year|that\s+year)/iu.test(String(text ?? "")); +} +function resolveRelativeMonthPeriodFromInventoryRoot(userMessage, followupContext) { + if (!followupContext || !isInventoryRootFrameIntent(followupContext.root_intent)) { + return null; + } + const month = resolveMonthNumberFromText(userMessage); + if (!month) { + return null; + } + const normalized = String(userMessage ?? ""); + if (hasExplicitPeriodLiteral(normalized) || hasExplicitCurrentDateHint(normalized)) { + return null; + } + const shortTemporalPatch = getTokenCount(normalized) <= 8 || hasRelativeYearHint(normalized); + if (!shortTemporalPatch) { + return null; + } + const year = resolveYearFromFilters(followupContext.root_filters); + if (!year) { + return null; + } + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + const periodFrom = `${year}-${String(month).padStart(2, "0")}-01`; + const periodTo = `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}`; + return { + period_from: periodFrom, + period_to: periodTo, + as_of_date: periodTo + }; +} +function shouldRestoreInventoryRootFrame(userMessage, intent, extractedFilters, followupContext) { + if (!followupContext || !isInventoryRootFrameIntent(followupContext.root_intent)) { + return false; + } + const currentFrameKind = followupContext.current_frame_kind ?? null; + const previousIntent = followupContext.previous_intent; + const comingFromInventoryDrilldown = currentFrameKind === "inventory_drilldown" || isInventoryDrilldownFrameIntent(previousIntent); + if (!comingFromInventoryDrilldown) { + return false; + } + const normalized = String(userMessage ?? ""); + if (hasSelectedObjectInventorySignal(normalized) || + hasInventorySupplierFollowupCue(normalized) || + hasInventoryPurchaseDocumentsFollowupCue(normalized) || + hasInventoryPurchaseDateFollowupCue(normalized) || + hasBareInventoryPurchaseDateFollowupCue(normalized) || + hasInventorySaleFollowupCue(normalized) || + hasInventoryPurchaseToSaleChainFollowupCue(normalized)) { + return false; + } + if (intent === "inventory_on_hand_as_of_date") { + return true; + } + const hasTemporalPatch = hasExplicitPeriodWindow(extractedFilters) || + Boolean(toNonEmptyString(extractedFilters.as_of_date)) || + hasExplicitPeriodLiteral(normalized) || + Boolean(resolveRelativeMonthPeriodFromInventoryRoot(normalized, followupContext)); + return hasTemporalPatch; +} function hasSelectedObjectInventorySignal(text) { return /(?:по\s+выбранному\s+объекту|for\s+selected\s+object)/iu.test(String(text ?? "")); } @@ -350,6 +489,7 @@ function mergeFollowupFilters(current, intent, userMessage, followupContext) { const previousAsOfDate = toNonEmptyString(previous.as_of_date); const previousPeriodFrom = toNonEmptyString(previous.period_from); const previousPeriodTo = toNonEmptyString(previous.period_to); + const relativeMonthFromInventoryRoot = resolveRelativeMonthPeriodFromInventoryRoot(userMessage, followupContext); const allTimeRequested = hasAllTimeHint(userMessage); const sameDateRequested = hasSameDateHint(userMessage); if (!toNonEmptyString(merged.organization) && previousOrganization) { @@ -516,6 +656,13 @@ function mergeFollowupFilters(current, intent, userMessage, followupContext) { reasons.push("as_of_date_from_open_items_followup_context"); } } + if (relativeMonthFromInventoryRoot && + (intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date")) { + merged.period_from = relativeMonthFromInventoryRoot.period_from; + merged.period_to = relativeMonthFromInventoryRoot.period_to; + merged.as_of_date = relativeMonthFromInventoryRoot.as_of_date; + reasons.push("period_derived_from_inventory_root_frame_year"); + } if (intent === "inventory_aging_by_purchase_date") { const explicitItemMention = /(?:^|[\s,.;:!?()\-\u2014])(?:товар(?:у|а|ом)?|позици(?:и|я|ю)|item|row|line)(?=$|[\s,.;:!?()\-\u2014])/iu.test(String(userMessage ?? "")); if (toNonEmptyString(merged.item) && !explicitItemMention) { @@ -822,7 +969,7 @@ function deriveIntentWithFollowupContext(detectedIntent, userMessage, followupCo reasons: [...detectedIntent.reasons, "intent_from_followup_context"] }; } -function runAddressDecomposeStage(userMessage, followupContext) { +function runAddressDecomposeStage(userMessage, followupContext, llmSemanticHints = null) { const detectedMode = (0, addressQueryClassifier_1.detectAddressQuestionMode)(userMessage); const shape = (0, addressQueryShapeClassifier_1.classifyAddressQueryShape)(userMessage); const allowExplainAsFollowup = shape.shape === "EXPLAIN_OR_REASON" && @@ -850,17 +997,29 @@ function runAddressDecomposeStage(userMessage, followupContext) { if (mode.mode !== "address_query") { return null; } - const intent = deriveIntentWithFollowupContext(detectedIntent, userMessage, followupContext); - const extractedFilters = (0, addressFilterExtractor_1.extractAddressFilters)(userMessage, intent.intent); - const followupMerged = mergeFollowupFilters(extractedFilters.extracted_filters, intent.intent, userMessage, followupContext); + let effectiveFollowupContext = followupContext; + let intent = deriveIntentWithFollowupContext(detectedIntent, userMessage, effectiveFollowupContext); + let extractedFilters = (0, semanticHintOverlay_1.applyAddressLlmSemanticHintsToExtraction)((0, addressFilterExtractor_1.extractAddressFilters)(userMessage, intent.intent), llmSemanticHints); + if (shouldRestoreInventoryRootFrame(userMessage, intent.intent, extractedFilters.extracted_filters, effectiveFollowupContext)) { + effectiveFollowupContext = buildInventoryRootFollowupContext(effectiveFollowupContext); + intent = { + intent: effectiveFollowupContext?.root_intent ?? "inventory_on_hand_as_of_date", + confidence: "low", + reasons: [...intent.reasons, "intent_restored_to_inventory_root_frame"] + }; + extractedFilters = (0, semanticHintOverlay_1.applyAddressLlmSemanticHintsToExtraction)((0, addressFilterExtractor_1.extractAddressFilters)(userMessage, intent.intent), llmSemanticHints); + } + const followupMerged = mergeFollowupFilters(extractedFilters.extracted_filters, intent.intent, userMessage, effectiveFollowupContext); const filters = { extracted_filters: followupMerged.filters, missing_required_filters: resolveMissingRequiredFilters(intent.intent, followupMerged.filters), - warnings: [...new Set([...extractedFilters.warnings, ...followupMerged.reasons])] + warnings: [...new Set([...extractedFilters.warnings, ...followupMerged.reasons])], + semantic_frame: extractedFilters.semantic_frame }; - const followupContextApplied = Boolean(followupContext) && + const followupContextApplied = Boolean(effectiveFollowupContext) && (mode.reasons.includes("address_mode_from_followup_context") || intent.reasons.includes("intent_from_followup_context") || + intent.reasons.includes("intent_restored_to_inventory_root_frame") || followupMerged.reasons.length > 0); const baseReasons = [ ...mode.reasons, diff --git a/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js b/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js index 930e786..5e16505 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js +++ b/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js @@ -6,10 +6,11 @@ const addressQueryClassifier_1 = require("../addressQueryClassifier"); const addressQueryShapeClassifier_1 = require("../addressQueryShapeClassifier"); const addressIntentResolver_1 = require("../addressIntentResolver"); const addressFilterExtractor_1 = require("../addressFilterExtractor"); -const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN = /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu; +const semanticHintOverlay_1 = require("./semanticHintOverlay"); +const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN = /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|\u0441\u043a\u043b\u0430\u0434|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|counterparty|contract|document|account|balance|turnover|operations?|warehouse|stock|inventory|item|goods|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu; const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu; const ADDRESS_SEMANTIC_SCOPE_META_PATTERN = /(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu; -const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN = /(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu; +const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN = /(?:\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition|\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c).*(?:\u0445\u0432\u043e\u0441\u0442|\u0440\u0430\u0437\u0440\u044b\u0432|\u0437\u0430\u043a\u0440\u044b\u0442|\u0446\u0435\u043f\u043e\u0447|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u043e\u0448\u0438\u0431|\u0430\u043d\u043e\u043c\u0430\u043b|\u0440\u0438\u0441\u043a|\u0441\u0432\u0435\u0440\u043a)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c).*(?:\u043f\u043e\u0447\u0435\u043c\u0443|\u0445\u0432\u043e\u0441\u0442|\u0440\u0430\u0437\u0440\u044b\u0432|\u0437\u0430\u043a\u0440\u044b\u0442|\u0446\u0435\u043f\u043e\u0447|\u043e\u0448\u0438\u0431|\u0430\u043d\u043e\u043c\u0430\u043b|\u0440\u0438\u0441\u043a))/iu; function normalizeCompact(value) { return String(value ?? "") .toLowerCase() @@ -127,8 +128,17 @@ function buildAddressLlmPredecomposeContractV1(input) { const mode = (0, addressQueryClassifier_1.detectAddressQuestionMode)(canonicalMessage); const shape = (0, addressQueryShapeClassifier_1.classifyAddressQueryShape)(canonicalMessage); const intent = (0, addressIntentResolver_1.resolveAddressIntent)(canonicalMessage); - const extraction = (0, addressFilterExtractor_1.extractAddressFilters)(canonicalMessage, intent.intent); + const extraction = (0, semanticHintOverlay_1.applyAddressLlmSemanticHintsToExtraction)((0, addressFilterExtractor_1.extractAddressFilters)(canonicalMessage, intent.intent), input.semanticHints ?? null); const filters = extraction.extracted_filters; + const semanticFrame = extraction.semantic_frame ?? { + scope_kind: "none", + anchor_kind: "none", + anchor_value: null, + date_scope_kind: "none", + date_basis_hint: null, + self_scope_detected: false, + selected_object_scope_detected: false + }; const periodScope = inferPeriodScope(filters, canonicalMessage); return { schema_version: "address_llm_predecompose_contract_v1", @@ -153,8 +163,9 @@ function buildAddressLlmPredecomposeContractV1(input) { period_from: toNonEmptyString(filters.period_from), period_to: toNonEmptyString(filters.period_to), as_of_date: toNonEmptyString(filters.as_of_date), - has_explicit_period: Boolean(toNonEmptyString(filters.as_of_date) || toNonEmptyString(filters.period_from) || toNonEmptyString(filters.period_to)) + has_explicit_period: semanticFrame.date_scope_kind === "explicit" }, + semantics: semanticFrame, aggregation_profile: inferAggregationProfile(intent.intent, shape.shape) }; } @@ -238,6 +249,7 @@ function buildAddressSemanticExtractionContractV1(input) { as_of_date: predecomposeContract.period.as_of_date, has_explicit_period: predecomposeContract.period.has_explicit_period }, + semantics: predecomposeContract.semantics, guard_hints: { source_data_signal_detected: sourceDataSignal, canonical_data_signal_detected: canonicalDataSignal, diff --git a/llm_normalizer/backend/dist/services/address_runtime/resolveStage.js b/llm_normalizer/backend/dist/services/address_runtime/resolveStage.js index 5cc600a..6d5f649 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/resolveStage.js +++ b/llm_normalizer/backend/dist/services/address_runtime/resolveStage.js @@ -143,6 +143,7 @@ function resolvePrimaryAnchor(intent, filters) { const contract = typeof filters.contract === "string" ? filters.contract.trim() : ""; const item = typeof filters.item === "string" ? filters.item.trim() : ""; const warehouse = typeof filters.warehouse === "string" ? filters.warehouse.trim() : ""; + const organization = typeof filters.organization === "string" ? filters.organization.trim() : ""; const documentRef = typeof filters.document_ref === "string" ? filters.document_ref.trim() : ""; if (intent === "account_balance_snapshot" || intent === "documents_forming_balance") { if (account) { @@ -218,6 +219,15 @@ function resolvePrimaryAnchor(intent, filters) { ambiguity_count: 0 }; } + if (organization) { + return { + anchor_type: "organization", + anchor_value_raw: organization, + anchor_value_resolved: organization, + resolver_confidence: "medium", + ambiguity_count: 0 + }; + } if (documentRef) { return { anchor_type: "document_ref", @@ -242,15 +252,24 @@ function refineAnchorFromRows(anchor, rows) { if (anchor.anchor_type !== "counterparty" && anchor.anchor_type !== "contract" && anchor.anchor_type !== "item" && - anchor.anchor_type !== "warehouse") { + anchor.anchor_type !== "warehouse" && + anchor.anchor_type !== "organization") { return anchor; } const needleRaw = String(anchor.anchor_value_raw ?? "").trim(); if (!needleRaw) { return anchor; } - const searchableRows = anchor.anchor_type === "item" || anchor.anchor_type === "warehouse" - ? rows.flatMap((row) => [row.registrator, row.item ?? "", row.warehouse ?? "", row.account_dt ?? "", row.account_kt ?? "", ...row.analytics]) + const searchableRows = anchor.anchor_type === "item" || anchor.anchor_type === "warehouse" || anchor.anchor_type === "organization" + ? rows.flatMap((row) => [ + row.registrator, + row.item ?? "", + row.warehouse ?? "", + row.organization ?? "", + row.account_dt ?? "", + row.account_kt ?? "", + ...row.analytics + ]) : rows.flatMap((row) => row.analytics); const candidates = uniqueStrings(searchableRows .map((value) => value.trim()) diff --git a/llm_normalizer/backend/dist/services/address_runtime/semanticHintOverlay.js b/llm_normalizer/backend/dist/services/address_runtime/semanticHintOverlay.js new file mode 100644 index 0000000..1d20ee9 --- /dev/null +++ b/llm_normalizer/backend/dist/services/address_runtime/semanticHintOverlay.js @@ -0,0 +1,138 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.normalizeAddressLlmSemanticHints = normalizeAddressLlmSemanticHints; +exports.applyAddressLlmSemanticHintsToExtraction = applyAddressLlmSemanticHintsToExtraction; +function toNonEmptyString(value) { + if (value === null || value === undefined) { + return null; + } + const normalized = String(value).trim(); + return normalized.length > 0 ? normalized : null; +} +function normalizeToken(value) { + return String(value ?? "") + .trim() + .toLowerCase() + .replace(/\s+/g, "_"); +} +function normalizeAddressLlmSemanticHints(value) { + if (!value || typeof value !== "object") { + return null; + } + const source = value; + const scopeToken = normalizeToken(source.scope_target_kind); + const dateToken = normalizeToken(source.date_scope_kind); + const scopeTargetKind = scopeToken === "self_scope" || + scopeToken === "selected_object" || + scopeToken === "organization" || + scopeToken === "warehouse" || + scopeToken === "counterparty" || + scopeToken === "contract" || + scopeToken === "item" + ? scopeToken + : "none"; + const dateScopeKind = dateToken === "explicit" || dateToken === "implicit_current" ? dateToken : "missing"; + return { + scope_target_kind: scopeTargetKind, + scope_target_text: toNonEmptyString(source.scope_target_text), + date_scope_kind: dateScopeKind, + self_scope_detected: source.self_scope_detected === true || scopeTargetKind === "self_scope", + selected_object_scope_detected: source.selected_object_scope_detected === true || scopeTargetKind === "selected_object" + }; +} +function defaultSemanticFrame(extraction) { + return (extraction.semantic_frame ?? { + scope_kind: "none", + anchor_kind: "none", + anchor_value: null, + date_scope_kind: "none", + date_basis_hint: null, + self_scope_detected: false, + selected_object_scope_detected: false + }); +} +function pushWarning(warnings, value) { + if (!warnings.includes(value)) { + warnings.push(value); + } +} +function applyDateScopeHint(frame, dateScopeKind) { + if (dateScopeKind === "explicit") { + frame.date_scope_kind = "explicit"; + return; + } + if (dateScopeKind === "implicit_current" && frame.date_scope_kind !== "explicit") { + frame.date_scope_kind = "implicit_current"; + frame.date_basis_hint = "implicit_current_snapshot"; + } +} +function applyAddressLlmSemanticHintsToExtraction(extraction, semanticHintsInput) { + const semanticHints = normalizeAddressLlmSemanticHints(semanticHintsInput); + if (!semanticHints) { + return extraction; + } + const extractedFilters = { ...(extraction.extracted_filters ?? {}) }; + const warnings = [...(Array.isArray(extraction.warnings) ? extraction.warnings : [])]; + const semanticFrame = { ...defaultSemanticFrame(extraction) }; + const scopeTargetText = semanticHints.scope_target_text; + applyDateScopeHint(semanticFrame, semanticHints.date_scope_kind); + if (semanticHints.self_scope_detected) { + semanticFrame.scope_kind = "implicit_self_scope"; + semanticFrame.anchor_kind = "self_scope"; + semanticFrame.anchor_value = null; + semanticFrame.self_scope_detected = true; + } + if (semanticHints.selected_object_scope_detected) { + if (semanticFrame.scope_kind === "none") { + semanticFrame.scope_kind = "selected_object_scope"; + semanticFrame.anchor_kind = "selected_object"; + semanticFrame.anchor_value = null; + } + semanticFrame.selected_object_scope_detected = true; + } + if (semanticHints.scope_target_kind === "organization" && scopeTargetText) { + extractedFilters.organization = scopeTargetText; + pushWarning(warnings, "organization_from_llm_semantics"); + if (toNonEmptyString(extractedFilters.warehouse)) { + delete extractedFilters.warehouse; + pushWarning(warnings, "warehouse_cleared_by_llm_organization_semantics"); + } + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "organization"; + semanticFrame.anchor_value = scopeTargetText; + } + if (semanticHints.scope_target_kind === "warehouse" && scopeTargetText) { + extractedFilters.warehouse = scopeTargetText; + pushWarning(warnings, "warehouse_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "warehouse"; + semanticFrame.anchor_value = scopeTargetText; + } + if (semanticHints.scope_target_kind === "counterparty" && scopeTargetText) { + extractedFilters.counterparty = scopeTargetText; + pushWarning(warnings, "counterparty_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "counterparty"; + semanticFrame.anchor_value = scopeTargetText; + } + if (semanticHints.scope_target_kind === "contract" && scopeTargetText) { + extractedFilters.contract = scopeTargetText; + pushWarning(warnings, "contract_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "contract"; + semanticFrame.anchor_value = scopeTargetText; + } + if (semanticHints.scope_target_kind === "item" && scopeTargetText) { + extractedFilters.item = scopeTargetText; + pushWarning(warnings, "item_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "item"; + semanticFrame.anchor_value = scopeTargetText; + } + return { + ...extraction, + extracted_filters: extractedFilters, + warnings, + semantic_frame: semanticFrame + }; +} diff --git a/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js index 3fb0766..c98c129 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js @@ -93,11 +93,13 @@ async function runAssistantAddressAttemptRuntime(input) { defaultApiKey: input.defaultApiKey })); }; - const runAddressLaneAttempt = async (messageUsed, carryMeta, analysisDateHint) => runAddressLaneAttemptRuntimeSafe((0, assistantAddressLaneAttemptInputBuilder_1.buildAssistantAddressLaneAttemptRuntimeInput)({ + const runAddressLaneAttempt = async (messageUsed, carryMeta, analysisDateHint, llmSemanticHints = null) => runAddressLaneAttemptRuntimeSafe((0, assistantAddressLaneAttemptInputBuilder_1.buildAssistantAddressLaneAttemptRuntimeInput)({ messageUsed, carryMeta, analysisDateHint, + llmSemanticHints, activeOrganization: input.sessionScope.activeOrganization, + knownOrganizations: input.sessionScope.knownOrganizations, mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle: input.runAddressQueryTryHandle })); diff --git a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptInputBuilder.js b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptInputBuilder.js index 3c38728..de3c334 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptInputBuilder.js +++ b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptInputBuilder.js @@ -6,7 +6,9 @@ function buildAssistantAddressLaneAttemptRuntimeInput(input) { messageUsed: input.messageUsed, carryMeta: input.carryMeta, analysisDateHint: input.analysisDateHint, + llmSemanticHints: input.llmSemanticHints, activeOrganization: input.activeOrganization, + knownOrganizations: input.knownOrganizations, mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle: input.runAddressQueryTryHandle }; diff --git a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptQueryOptionsBuilder.js b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptQueryOptionsBuilder.js index 4f07598..0c224c8 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptQueryOptionsBuilder.js +++ b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptQueryOptionsBuilder.js @@ -8,13 +8,20 @@ function resolveAssistantAddressLaneAttemptFollowupContext(carryMeta) { : null; } function buildAssistantAddressLaneAttemptQueryOptions(input) { - if (input.scopedFollowupContext) { - return { - followupContext: input.scopedFollowupContext, - analysisDateHint: input.analysisDateHint - }; - } - return { + const base = { analysisDateHint: input.analysisDateHint }; + if (input.scopedFollowupContext) { + base.followupContext = input.scopedFollowupContext; + } + if (input.llmSemanticHints) { + base.llmSemanticHints = input.llmSemanticHints; + } + if (input.activeOrganization) { + base.activeOrganization = input.activeOrganization; + } + if (input.knownOrganizations.length > 0) { + base.knownOrganizations = input.knownOrganizations; + } + return base; } diff --git a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptRuntimeAdapter.js index bfebe3c..cfd96be 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressLaneAttemptRuntimeAdapter.js @@ -7,6 +7,9 @@ async function runAssistantAddressLaneAttemptRuntime(input) { const scopedFollowupContext = input.mergeFollowupContextWithOrganizationScope(followupContext, input.activeOrganization); return input.runAddressQueryTryHandle(input.messageUsed, (0, assistantAddressLaneAttemptQueryOptionsBuilder_1.buildAssistantAddressLaneAttemptQueryOptions)({ analysisDateHint: input.analysisDateHint, - scopedFollowupContext + scopedFollowupContext, + llmSemanticHints: input.llmSemanticHints ?? null, + activeOrganization: input.activeOrganization, + knownOrganizations: input.knownOrganizations })); } diff --git a/llm_normalizer/backend/dist/services/assistantAddressLaneResponseRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressLaneResponseRuntimeAdapter.js index f908206..6d02ae3 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressLaneResponseRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressLaneResponseRuntimeAdapter.js @@ -157,12 +157,30 @@ function runAssistantAddressLaneResponseRuntime(input) { : null; const debugActiveOrganization = input.toNonEmptyString(debugFilters?.organization) ?? input.toNonEmptyString(input.activeOrganization); + const followupContextSource = input.carryoverMeta?.followupContext && typeof input.carryoverMeta.followupContext === "object" + ? input.carryoverMeta.followupContext + : null; if (debugKnownOrganizations.length > 0) { debug.assistant_known_organizations = debugKnownOrganizations; } if (debugActiveOrganization) { debug.assistant_active_organization = debugActiveOrganization; } + const rootIntent = input.toNonEmptyString(followupContextSource?.root_intent); + const currentFrameKind = input.toNonEmptyString(followupContextSource?.current_frame_kind); + const rootFilters = followupContextSource?.root_filters && typeof followupContextSource.root_filters === "object" + ? followupContextSource.root_filters + : null; + if (rootIntent || currentFrameKind) { + debug.address_root_frame_context = { + root_intent: rootIntent, + current_frame_kind: currentFrameKind, + organization: input.toNonEmptyString(rootFilters?.organization), + as_of_date: input.toNonEmptyString(rootFilters?.as_of_date), + period_from: input.toNonEmptyString(rootFilters?.period_from), + period_to: input.toNonEmptyString(rootFilters?.period_to) + }; + } const finalization = finalizeAddressTurnSafe({ sessionId: input.sessionId, userMessage: input.userMessage, diff --git a/llm_normalizer/backend/dist/services/assistantAddressLaneRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressLaneRuntimeAdapter.js index 90f5598..7930304 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressLaneRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressLaneRuntimeAdapter.js @@ -40,7 +40,7 @@ async function runAssistantAddressLaneRuntime(input) { return { action: "continue" }; }; if (input.shouldPreferContextualLane) { - const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover); + const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover, input.llmSemanticHints ?? null); const decision = evaluateAddressLane(contextualAddressLane, input.addressInputMessage, input.carryover); if (decision.action === "return") { return { @@ -50,7 +50,7 @@ async function runAssistantAddressLaneRuntime(input) { }; } } - const primaryAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, null); + const primaryAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, null, input.llmSemanticHints ?? null); const primaryDecision = evaluateAddressLane(primaryAddressLane, input.addressInputMessage, null); if (primaryDecision.action === "return") { return { @@ -60,7 +60,7 @@ async function runAssistantAddressLaneRuntime(input) { }; } if (!input.shouldPreferContextualLane && input.carryover?.followupContext) { - const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover); + const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover, input.llmSemanticHints ?? null); const contextualDecision = evaluateAddressLane(contextualAddressLane, input.addressInputMessage, input.carryover); if (contextualDecision.action === "return") { return { @@ -78,7 +78,7 @@ async function runAssistantAddressLaneRuntime(input) { retryAudit.retry_message = input.userMessage; if (input.carryover?.followupContext) { retryAudit.retry_used_followup_context = true; - const rawContextualLane = await input.runAddressLaneAttempt(input.userMessage, input.carryover); + const rawContextualLane = await input.runAddressLaneAttempt(input.userMessage, input.carryover, input.llmSemanticHints ?? null); const rawContextualDecision = evaluateAddressLane(rawContextualLane, input.userMessage, input.carryover); if (rawContextualDecision.action === "return") { retryAudit.retry_result_category = limitedCategory(rawContextualDecision.selection.addressLane); @@ -89,7 +89,7 @@ async function runAssistantAddressLaneRuntime(input) { }; } } - const rawPrimaryLane = await input.runAddressLaneAttempt(input.userMessage, null); + const rawPrimaryLane = await input.runAddressLaneAttempt(input.userMessage, null, input.llmSemanticHints ?? null); retryAudit.retry_result_category = limitedCategory(rawPrimaryLane); const rawPrimaryDecision = evaluateAddressLane(rawPrimaryLane, input.userMessage, null); if (rawPrimaryDecision.action === "return") { diff --git a/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js index 82fc8c2..3bc6c45 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js @@ -1,6 +1,41 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildAssistantAddressOrchestrationRuntime = buildAssistantAddressOrchestrationRuntime; +function hasSelectedObjectInventorySignal(text) { + return /(?:по\s+выбранному\s+объекту|по\s+этой\s+позиции|по\s+этому\s+товару|selected\s+object)/iu.test(String(text ?? "")); +} +function hasSelectedObjectInventoryActionCue(text) { + return /(?:кому[\s\S]{0,80}продал[аи]?|кому[\s\S]{0,80}реализова[нлт][а-я]*|кому\s+был\s+продан|кто[\s\S]{0,40}купил|кто\s+это\s+поставил|кто\s+поставил|у\s+кого\s+купили|у\s+кого\s+куплено|где\s+мы\s+купили|где\s+куплено|по\s+каким\s+документам|какими\s+документами|покажи\s+документы|документы\s+закупки|buyer|sale\s+trace|supplier|vendor|purchase\s+documents|purchase[\s-]?to[\s-]?sale|old\s+purchase|aged\s+stock)/iu.test(String(text ?? "")); +} +function isGenericCanonicalDriftIntent(intent) { + return (intent === "open_items_by_counterparty_or_contract" || + intent === "list_documents_by_counterparty" || + intent === "list_documents_by_contract" || + intent === "bank_operations_by_counterparty" || + intent === "bank_operations_by_contract" || + intent === "documents_forming_balance"); +} +function shouldPreferRawFollowupMessage(userMessage, addressInputMessage, carryover, addressPreDecompose, toNonEmptyString) { + if (!carryover?.followupContext || typeof carryover.followupContext !== "object") { + return false; + } + const rawMessage = toNonEmptyString(userMessage); + const canonicalMessage = toNonEmptyString(addressInputMessage); + if (!rawMessage || !canonicalMessage || rawMessage === canonicalMessage) { + return false; + } + const predecomposeContract = addressPreDecompose?.predecomposeContract && typeof addressPreDecompose.predecomposeContract === "object" + ? addressPreDecompose.predecomposeContract + : null; + const mode = toNonEmptyString(predecomposeContract?.mode) ?? "unknown"; + const intent = toNonEmptyString(predecomposeContract?.intent) ?? "unknown"; + if (mode === "unsupported" && intent === "unknown") { + return true; + } + return (hasSelectedObjectInventorySignal(rawMessage) && + hasSelectedObjectInventoryActionCue(rawMessage) && + isGenericCanonicalDriftIntent(intent)); +} function fallbackAddressPreDecompose(userMessage, llmProvider, buildAddressLlmPredecomposeContractV1, sanitizeAddressMessageForFallback) { const provider = llmProvider === "local" ? "local" : llmProvider === "openai" ? "openai" : null; return { @@ -22,11 +57,26 @@ function fallbackAddressPreDecompose(userMessage, llmProvider, buildAddressLlmPr }; } async function buildAssistantAddressOrchestrationRuntime(input) { - const addressPreDecompose = input.featureAddressLlmPredecomposeV1 + const initialAddressPreDecompose = input.featureAddressLlmPredecomposeV1 ? await input.runAddressLlmPreDecompose() : fallbackAddressPreDecompose(input.userMessage, input.llmProvider, input.buildAddressLlmPredecomposeContractV1, input.sanitizeAddressMessageForFallback); - const addressInputMessage = input.toNonEmptyString(addressPreDecompose?.effectiveMessage) ?? input.userMessage; - const carryover = input.resolveAddressFollowupCarryoverContext(input.userMessage, input.sessionItems, addressInputMessage, addressPreDecompose); + let addressPreDecompose = initialAddressPreDecompose; + let addressInputMessage = input.toNonEmptyString(addressPreDecompose?.effectiveMessage) ?? input.userMessage; + let carryover = input.resolveAddressFollowupCarryoverContext(input.userMessage, input.sessionItems, addressInputMessage, addressPreDecompose); + if (shouldPreferRawFollowupMessage(input.userMessage, addressInputMessage, carryover, addressPreDecompose, input.toNonEmptyString)) { + addressInputMessage = input.userMessage; + addressPreDecompose = { + ...addressPreDecompose, + applied: false, + effectiveMessage: input.userMessage, + reason: "followup_raw_message_preferred_over_llm_rewrite", + predecomposeContract: input.buildAddressLlmPredecomposeContractV1({ + sourceMessage: input.userMessage, + canonicalMessage: input.userMessage + }) + }; + carryover = input.resolveAddressFollowupCarryoverContext(input.userMessage, input.sessionItems, addressInputMessage, addressPreDecompose); + } const followupContext = carryover?.followupContext ?? null; const orchestrationDecision = input.resolveAssistantOrchestrationDecision({ rawUserMessage: input.userMessage, diff --git a/llm_normalizer/backend/dist/services/assistantAddressRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressRuntimeAdapter.js index 492e0d4..b035b09 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressRuntimeAdapter.js @@ -62,9 +62,12 @@ async function runAssistantAddressRuntime(input) { userMessage: input.userMessage, addressInputMessage, carryover, + llmSemanticHints: addressRuntimeMeta && typeof addressRuntimeMeta === "object" + ? addressRuntimeMeta.semanticHints ?? null + : null, shouldPreferContextualLane, canRetryWithRawUserMessage, - runAddressLaneAttempt: (messageUsed, carryMeta) => input.runAddressLaneAttempt(messageUsed, carryMeta, analysisDateHint), + runAddressLaneAttempt: (messageUsed, carryMeta, llmSemanticHints = null) => input.runAddressLaneAttempt(messageUsed, carryMeta, analysisDateHint, llmSemanticHints), isRetryableAddressLimitedResult: input.isRetryableAddressLimitedResult }); if (addressLaneRuntime.handled && addressLaneRuntime.selection) { diff --git a/llm_normalizer/backend/dist/services/assistantOrganizationMatcher.js b/llm_normalizer/backend/dist/services/assistantOrganizationMatcher.js new file mode 100644 index 0000000..9af0ea9 --- /dev/null +++ b/llm_normalizer/backend/dist/services/assistantOrganizationMatcher.js @@ -0,0 +1,209 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.normalizeOrganizationScopeValue = normalizeOrganizationScopeValue; +exports.normalizeOrganizationScopeSearchText = normalizeOrganizationScopeSearchText; +exports.scoreOrganizationMentionInMessage = scoreOrganizationMentionInMessage; +exports.mergeKnownOrganizations = mergeKnownOrganizations; +exports.resolveOrganizationSelectionFromMessage = resolveOrganizationSelectionFromMessage; +const ORGANIZATION_SCOPE_STOPWORDS = new Set([ + "ооо", + "зао", + "оао", + "пао", + "ао", + "ип", + "llc", + "inc", + "ltd", + "corp", + "group", + "company", + "co", + "the", + "and", + "org", + "organization", + "компания", + "организация", + "контора", + "фирма", + "база", + "по", + "в", + "во", + "на", + "для", + "из", + "у", + "к", + "от", + "это", + "эта", + "этой", + "этот", + "сегодня", + "сейчас", + "текущая", + "текущей", + "наш", + "наша", + "нашей", + "нашу", + "наши" +]); +function normalizeScopeLabel(value) { + return String(value ?? "") + .replace(/[“”«»]/g, '"') + .replace(/\s+/g, " ") + .trim(); +} +function normalizeScopeKey(value) { + return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е"); +} +function normalizeOrganizationScopeValue(value) { + const normalized = normalizeScopeLabel(value); + if (!normalized) { + return null; + } + let unwrapped = normalized.replace(/^\\+|\\+$/g, "").trim(); + if ((unwrapped.startsWith('"') && unwrapped.endsWith('"')) || + (unwrapped.startsWith("'") && unwrapped.endsWith("'"))) { + unwrapped = unwrapped.slice(1, -1).trim(); + } + return unwrapped.length > 0 ? unwrapped : null; +} +function normalizeOrganizationScopeSearchText(value) { + return normalizeScopeKey(value) + .replace(/[^\p{L}\p{N}]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} +function tokenizeOrganizationScope(value) { + const normalized = normalizeOrganizationScopeSearchText(value); + if (!normalized) { + return []; + } + return normalized + .split(" ") + .map((token) => token.trim()) + .filter((token) => token.length >= 3 && !ORGANIZATION_SCOPE_STOPWORDS.has(token)); +} +function organizationTokenVariants(token) { + const source = String(token ?? "").trim().toLowerCase(); + if (!source) { + return []; + } + const variants = new Set([source]); + const withoutLongEnding = source.replace(/(?:ами|ями|ого|ему|ому|ыми|ими|иях|ях|ах|ей|ой|ом|ем|ам|ям|ую|юю|ая|яя|ое|ее|ые|ие|ов|ев|ий|ый|ой)$/iu, ""); + if (withoutLongEnding.length >= 4) { + variants.add(withoutLongEnding); + } + const withoutShortEnding = source.replace(/[аеёиоуыэюя]$/iu, ""); + if (withoutShortEnding.length >= 4) { + variants.add(withoutShortEnding); + } + return Array.from(variants); +} +function scoreOrganizationMentionInMessage(message, organization) { + const messageNorm = normalizeOrganizationScopeSearchText(message); + const organizationNorm = normalizeOrganizationScopeSearchText(organization); + if (!messageNorm || !organizationNorm) { + return 0; + } + if (messageNorm.includes(organizationNorm)) { + return 10_000 + organizationNorm.length; + } + const organizationTokens = tokenizeOrganizationScope(organizationNorm); + const messageTokens = tokenizeOrganizationScope(messageNorm); + if (organizationTokens.length === 0 || messageTokens.length === 0) { + return 0; + } + let matchedTokens = 0; + let score = 0; + for (const token of organizationTokens) { + const variants = organizationTokenVariants(token); + let matched = false; + let variantScore = 0; + for (const variant of variants) { + if (!variant) { + continue; + } + if (messageNorm.includes(variant)) { + matched = true; + variantScore = Math.max(variantScore, variant.length * 5); + continue; + } + const fuzzyMatched = messageTokens.some((messageToken) => { + if (messageToken === variant) { + return true; + } + if (messageToken.length >= 5 && variant.length >= 5) { + return messageToken.startsWith(variant) || variant.startsWith(messageToken); + } + return false; + }); + if (fuzzyMatched) { + matched = true; + variantScore = Math.max(variantScore, Math.max(20, variant.length * 3)); + } + } + if (matched) { + matchedTokens += 1; + score += variantScore > 0 ? variantScore : 10; + } + } + if (matchedTokens === 0) { + return 0; + } + if (matchedTokens === organizationTokens.length) { + score += 400; + } + else { + score += matchedTokens * 50; + } + return score; +} +function mergeKnownOrganizations(values, limit = 50) { + const dedup = new Map(); + for (const raw of Array.isArray(values) ? values : []) { + const normalized = normalizeOrganizationScopeValue(raw); + if (!normalized) { + continue; + } + const key = normalizeOrganizationScopeSearchText(normalized); + if (!key || dedup.has(key)) { + continue; + } + dedup.set(key, normalized); + } + return Array.from(dedup.values()).slice(0, limit); +} +function resolveOrganizationSelectionFromMessage(userMessage, knownOrganizations) { + const known = mergeKnownOrganizations(Array.isArray(knownOrganizations) ? knownOrganizations : []); + if (!userMessage || known.length === 0) { + return null; + } + const messageNorm = normalizeOrganizationScopeSearchText(userMessage); + if (!messageNorm) { + return null; + } + const scored = known + .map((organization) => ({ + organization, + score: scoreOrganizationMentionInMessage(messageNorm, organization) + })) + .filter((item) => item.score > 0) + .sort((a, b) => b.score - a.score || a.organization.length - b.organization.length); + if (scored.length === 0) { + return null; + } + const best = scored[0]; + const second = scored[1]; + if (best.score < 90) { + return null; + } + if (second && second.score === best.score) { + return null; + } + return best.organization; +} diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index b28663f..7dc7b80 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -2496,6 +2496,62 @@ function findRecentAddressFilterValue(items, key) { } return null; } +function isInventoryRootFrameIntent(intent) { + return intent === "inventory_on_hand_as_of_date"; +} +function isInventoryDrilldownFrameIntent(intent) { + return intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date"; +} +function extractAddressCarryoverAnchor(addressDebug) { + if (!isAddressLaneDebugPayload(addressDebug)) { + return { + anchorType: null, + anchorValue: null + }; + } + return { + anchorType: toNonEmptyString(addressDebug.anchor_type), + anchorValue: toNonEmptyString(addressDebug.anchor_value_resolved) ?? + toNonEmptyString(addressDebug.anchor_value_raw) ?? + readAddressInventoryItemFilter(addressDebug) ?? + readAddressFilterString(addressDebug, "counterparty") ?? + readAddressFilterString(addressDebug, "contract") ?? + readAddressFilterString(addressDebug, "account") + }; +} +function findRecentInventoryRootFrame(items) { + for (let index = items.length - 1; index >= 0; index -= 1) { + const item = items[index]; + if (!item || item.role !== "assistant" || !item.debug) { + continue; + } + const debug = item.debug; + if (!isAddressLaneDebugPayload(debug)) { + continue; + } + const detectedIntent = toNonEmptyString(debug.detected_intent); + if (!isInventoryRootFrameIntent(detectedIntent)) { + continue; + } + const anchor = extractAddressCarryoverAnchor(debug); + const filtersRaw = debug.extracted_filters; + const filters = filtersRaw && typeof filtersRaw === "object" + ? { ...filtersRaw } + : {}; + return { + intent: detectedIntent, + filters, + anchorType: anchor.anchorType, + anchorValue: anchor.anchorValue, + messageId: toNonEmptyString(item.message_id) + }; + } + return null; +} const ADDRESS_FOLLOWUP_OFFER_BY_INTENT = { list_documents_by_counterparty: ["bank_operations_by_counterparty", "list_contracts_by_counterparty"], bank_operations_by_counterparty: ["list_documents_by_counterparty", "list_contracts_by_counterparty"], @@ -2798,6 +2854,14 @@ function resolveAddressFollowupCarryoverContext(userMessage, items, alternateMes readAddressFilterString(previousAddressDebug, "counterparty") ?? readAddressFilterString(previousAddressDebug, "account") ?? readAddressFilterString(previousAddressDebug, "contract"); + const inventoryRootFrame = findRecentInventoryRootFrame(items); + const currentFrameKind = inventoryRootFrame + ? isInventoryDrilldownFrameIntent(sourceIntent) + ? "inventory_drilldown" + : isInventoryRootFrameIntent(sourceIntent) + ? "inventory_root" + : "generic" + : null; let resolvedCounterpartyFromDisplay = false; const previousFiltersRaw = previousAddressDebug.extracted_filters; const previousFilters = previousFiltersRaw && typeof previousFiltersRaw === "object" @@ -2857,7 +2921,12 @@ function resolveAddressFollowupCarryoverContext(userMessage, items, alternateMes previous_filters: previousFilters, previous_anchor_type: previousAnchorType ?? undefined, previous_anchor_value: previousAnchor, - resolved_counterparty_from_display: resolvedCounterpartyFromDisplay || undefined + resolved_counterparty_from_display: resolvedCounterpartyFromDisplay || undefined, + root_intent: inventoryRootFrame?.intent ?? undefined, + root_filters: inventoryRootFrame?.filters ?? undefined, + root_anchor_type: inventoryRootFrame?.anchorType ?? undefined, + root_anchor_value: inventoryRootFrame?.anchorValue ?? undefined, + current_frame_kind: currentFrameKind ?? undefined }, previousAddressIntent: previousIntent, previousAddressAnchor: previousAnchor, @@ -2933,19 +3002,32 @@ function isAddressLlmPreDecomposeCandidate(userMessage) { } return /(?:\bдок\b|доки|документ|контрагент|договор|остаток|сч(?:е|ё)т|сальдо|банк|выписк|платеж|оплат|поступлен|поступлени|списан|реализац|сверк|взаиморасч|кто\s+должен|show|list|documents?|counterparty|contract|account|balance|bank\s+operations?|doki|dokument(?:y|ov|am|a)?|platezh|oplata|schet|saldo)/i.test(text); } -function extractAddressQuestionFromNormalized(normalized) { - if (!normalized || typeof normalized !== "object") { +function normalizeAddressSemanticHintsFromFragment(fragment) { + if (!fragment || typeof fragment !== "object") { return null; } - const source = normalized; - const fragments = Array.isArray(source.fragments) ? source.fragments : []; - for (const item of fragments) { + const hints = fragment.semantic_hints; + if (!hints || typeof hints !== "object") { + return null; + } + const scopeTargetKind = toNonEmptyString(hints.scope_target_kind); + const dateScopeKind = toNonEmptyString(hints.date_scope_kind); + return { + scope_target_kind: scopeTargetKind ?? "none", + scope_target_text: toNonEmptyString(hints.scope_target_text), + date_scope_kind: dateScopeKind ?? "missing", + self_scope_detected: hints.self_scope_detected === true || scopeTargetKind === "self_scope", + selected_object_scope_detected: hints.selected_object_scope_detected === true || scopeTargetKind === "selected_object" + }; +} +function extractAddressPredecomposeCandidateFromFragments(fragments) { + for (const item of Array.isArray(fragments) ? fragments : []) { if (!item || typeof item !== "object") { continue; } const fragment = item; const domainRelevance = String(fragment.domain_relevance ?? "").trim().toLowerCase(); - if (domainRelevance === "out_of_scope") { + if (domainRelevance === "out_of_scope" || domainRelevance === "offtopic") { continue; } const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); @@ -2955,11 +3037,20 @@ function extractAddressQuestionFromNormalized(normalized) { continue; } if (candidate.length >= 3 && candidate.length <= 500) { - return candidate; + return { + candidate, + semanticHints: normalizeAddressSemanticHintsFromFragment(fragment) + }; } } return null; } +function extractAddressPredecomposeCandidateFromNormalized(normalized) { + if (!normalized || typeof normalized !== "object") { + return null; + } + return extractAddressPredecomposeCandidateFromFragments(normalized.fragments); +} function stripMarkdownJsonFence(text) { return String(text ?? "") .trim() @@ -3037,7 +3128,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) { } return null; } -function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { +function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) { const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); if (!outputText) { return null; @@ -3046,31 +3137,7 @@ function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { if (!parsed || typeof parsed !== "object") { return null; } - const source = parsed; - const fragments = Array.isArray(source.fragments) ? source.fragments : []; - for (const item of fragments) { - if (!item || typeof item !== "object") { - continue; - } - const fragment = item; - const domainRelevance = fragment.domain_relevance; - if (typeof domainRelevance === "string" && domainRelevance.trim().toLowerCase() === "out_of_scope") { - continue; - } - if (domainRelevance === false) { - continue; - } - const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); - const rawText = toNonEmptyString(fragment.raw_fragment_text); - const candidate = selectPreferredAddressFragmentCandidate(rawText ?? "", normalizedText ?? ""); - if (!candidate) { - continue; - } - if (candidate.length >= 3 && candidate.length <= 500) { - return candidate; - } - } - return null; + return extractAddressPredecomposeCandidateFromFragments(parsed.fragments); } const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "есть", @@ -3310,7 +3377,8 @@ function attachAddressPredecomposeContract(meta, sourceMessage) { const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? ""); const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({ sourceMessage: String(sourceMessage ?? ""), - canonicalMessage + canonicalMessage, + semanticHints: meta?.semanticHints ?? null }); const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({ sourceMessage: String(sourceMessage ?? ""), @@ -3375,9 +3443,10 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidateFromNormalized = extractAddressQuestionFromNormalized(normalized?.normalized); - const candidateFromRaw = candidateFromNormalized ? null : extractAddressQuestionFromRawNormalizerOutput(normalized?.raw_model_output); - const candidate = candidateFromNormalized ?? candidateFromRaw; + const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output); + const candidateMeta = candidateFromNormalized ?? candidateFromRaw; + const candidate = candidateMeta?.candidate ?? null; if (!candidate) { if (fallbackCandidate) { const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase()); @@ -3391,7 +3460,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage traceId: normalized?.trace_id ?? null, effectiveMessage: fallbackCandidate.candidate, reason: "fallback_rule_applied_after_llm", - fallbackRuleHit: fallbackCandidate.rule + fallbackRuleHit: fallbackCandidate.rule, + semanticHints: null }, userMessage); } } @@ -3399,7 +3469,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage ...baseMeta, attempted: true, traceId: normalized?.trace_id ?? null, - reason: normalized?.ok ? "no_usable_fragment" : "normalize_failed" + reason: normalized?.ok ? "no_usable_fragment" : "normalize_failed", + semanticHints: null }, userMessage); } const repairedSourceMessage = repairAddressMojibake(userMessage); @@ -3418,7 +3489,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_diagnostic_rewrite", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const intentConflict = sourceIntentKnown && @@ -3440,7 +3512,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage ? "normalized_fragment_rejected_intent_drop" : "normalized_fragment_rejected_intent_conflict", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceHasExplicitDrilldownSignal = hasPredecomposeExplicitDrilldownSignal(repairedSourceMessage || userMessage); @@ -3461,7 +3534,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_followup_intent_injection", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceHasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignalForPredecompose(repairedSourceMessage || userMessage); @@ -3481,7 +3555,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_selected_object_context_loss", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceAnchorQuality = evaluateAddressAnchorQuality(repairedSourceMessage || userMessage); @@ -3507,7 +3582,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_anchor_substitution", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const anchorDegradedByCandidate = sameIntentForAnchorSafety && @@ -3524,7 +3600,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_anchor_degradation", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } if (fallbackCandidate) { @@ -3543,19 +3620,25 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: fallbackCandidate.candidate, reason: "fallback_rule_preferred_over_llm_candidate_anchor_quality", fallbackRuleHit: fallbackCandidate.rule, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } } const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({ sourceMessage: String(userMessage ?? ""), - canonicalMessage: candidate + canonicalMessage: candidate, + predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({ + sourceMessage: String(userMessage ?? ""), + canonicalMessage: candidate, + semanticHints: candidateMeta?.semanticHints ?? null + }) }); if (!semanticContractForCandidate.apply_canonical_recommended) { const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected); const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected && candidateFromNormalized && - candidateFromNormalized === candidate && + candidateFromNormalized.candidate === candidate && toNonEmptyString(candidate)); if (rawFragmentCandidatePreferred) { return attachAddressPredecomposeContract({ @@ -3567,7 +3650,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: candidate, reason: "normalized_fragment_semantic_guard_raw_fragment_preferred", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } if (fallbackCandidate) { @@ -3588,7 +3672,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: String(fallbackCandidate.candidate ?? ""), reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard", fallbackRuleHit: fallbackCandidate.rule, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } } @@ -3601,7 +3686,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_semantic_guard", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase()); @@ -3628,7 +3714,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage reason, llmCanonicalCandidateDetected: true, fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } catch (error) { @@ -3975,7 +4062,11 @@ function resolveAssistantOrchestrationDecision(input) { hasOpenContractsAddressSignal(repairedEffectiveAddressUserMessage); const modeSample = repairedEffectiveAddressUserMessage || effectiveAddressUserMessage; const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample); + const modeDetectionRaw = (0, addressQueryClassifier_1.detectAddressQuestionMode)(repairedRawUserMessage || rawUserMessage); + const resolvedModeDetection = modeDetection.mode === "address_query" ? modeDetection : modeDetectionRaw; const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample); + const intentResolutionRaw = (0, addressIntentResolver_1.resolveAddressIntent)(repairedRawUserMessage || rawUserMessage); + const resolvedIntentResolution = intentResolution.intent !== "unknown" ? intentResolution : intentResolutionRaw; const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent); const llmPreDecomposeReason = toNonEmptyString(llmPreDecomposeMeta?.reason); const llmRuntimeUnavailableDetected = Boolean(llmPreDecomposeReason && @@ -3993,10 +4084,10 @@ function resolveAssistantOrchestrationDecision(input) { hasStrictDeepInvestigationCue(repairedRawUserMessage) || hasStrictDeepInvestigationCue(effectiveAddressUserMessage) || hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage); - const strictDeepInvestigationBypassAllowed = shouldBypassStrictDeepInvestigationCueForAddressIntent(intentResolution.intent) || + const strictDeepInvestigationBypassAllowed = shouldBypassStrictDeepInvestigationCueForAddressIntent(resolvedIntentResolution.intent) || shouldBypassStrictDeepInvestigationCueForAddressIntent(llmContractIntent); const keepAddressLaneByIntent = semanticApplyCanonicalRecommended && - Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) || + Boolean((resolvedIntentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(resolvedIntentResolution.intent)) || (llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent)) || openContractsAddressSignal) && (!strictDeepInvestigationCueDetected || strictDeepInvestigationBypassAllowed); @@ -4037,8 +4128,8 @@ function resolveAssistantOrchestrationDecision(input) { !capabilityMetaQuery && !dataRetrievalSignal && !effectiveAddressFollowupSignal && - modeDetection.mode === "unsupported" && - intentResolution.intent === "unknown"); + resolvedModeDetection.mode === "unsupported" && + resolvedIntentResolution.intent === "unknown"); const nonDomainQueryIndexed = Boolean(!llmFirstAddressCandidate && deterministicNonDomainGuard && (llmFirstUnsupportedCandidate || llmContractMode === null) && @@ -4058,10 +4149,10 @@ function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "data_scope", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4086,10 +4177,10 @@ function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "capability", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4114,10 +4205,10 @@ function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "non_domain", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4153,7 +4244,7 @@ function resolveAssistantOrchestrationDecision(input) { hasShortDebtMirrorFollowupSignal(repairedRawUserMessage) || hasShortDebtMirrorFollowupSignal(repairedEffectiveAddressUserMessage)); const supportedAddressIntentDetected = (!strictDeepInvestigationCueDetected || strictDeepInvestigationBypassAllowed) && - Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) || + Boolean((resolvedIntentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(resolvedIntentResolution.intent)) || (llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent)) || openContractsAddressSignal); const semanticGuardHints = semanticExtractionContract?.guard_hints && @@ -4173,7 +4264,7 @@ function resolveAssistantOrchestrationDecision(input) { semanticAggregateShapeDetected || semanticDeepInvestigationHintDetected || !semanticApplyCanonicalRecommended)); - const unsupportedIntentOrMode = (modeDetection.mode !== "address_query" && intentResolution.intent === "unknown") || + const unsupportedIntentOrMode = (resolvedModeDetection.mode !== "address_query" && resolvedIntentResolution.intent === "unknown") || llmContractMode === "unsupported"; const unsupportedAddressIntentFallbackToDeep = Boolean(baseToolGate?.runAddressLane && !llmRuntimeUnavailableDetected && @@ -4293,10 +4384,10 @@ function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: null, - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, semantic_contract_valid: semanticContractValid, diff --git a/llm_normalizer/backend/dist/services/normalizerService.js b/llm_normalizer/backend/dist/services/normalizerService.js index 61b3d44..0606578 100644 --- a/llm_normalizer/backend/dist/services/normalizerService.js +++ b/llm_normalizer/backend/dist/services/normalizerService.js @@ -300,6 +300,71 @@ function coerceFlags(value, fallback) { mentions_period_close_context: pick("mentions_period_close_context", ["period_close_context"]) }; } +function inferSemanticHints(rawText, timeScope) { + return { + scope_target_kind: "none", + scope_target_text: null, + date_scope_kind: timeScope.type === "explicit" ? "explicit" : "missing", + self_scope_detected: false, + selected_object_scope_detected: /(?:по\s+выбранному\s+объекту|selected\s+object)/iu.test(String(rawText ?? "")) + }; +} +function coerceSemanticScopeTargetKind(value) { + const token = normalizeToken(value); + if (token === "none" || + token === "self_scope" || + token === "selected_object" || + token === "organization" || + token === "warehouse" || + token === "counterparty" || + token === "contract" || + token === "item") { + return token; + } + if (["organization_scope", "company_scope", "org_scope", "company", "organization_anchor"].includes(token)) { + return "organization"; + } + if (["warehouse_scope", "stock_scope", "warehouse_anchor"].includes(token)) { + return "warehouse"; + } + if (["own_company_scope", "implicit_self_scope", "our_scope"].includes(token)) { + return "self_scope"; + } + if (["selected_object_scope", "selected_object_anchor"].includes(token)) { + return "selected_object"; + } + return "none"; +} +function coerceSemanticDateScopeKind(value) { + const token = normalizeToken(value); + if (token === "explicit" || token === "implicit_current" || token === "missing") { + return token; + } + if (["implicit_current_snapshot", "current", "today", "default_current"].includes(token)) { + return "implicit_current"; + } + return "missing"; +} +function coerceSemanticHints(value, rawText, timeScope) { + const fallback = inferSemanticHints(rawText, timeScope); + if (!value || typeof value !== "object") { + return fallback; + } + const source = value; + return { + scope_target_kind: coerceSemanticScopeTargetKind(source.scope_target_kind ?? source.anchor_kind ?? source.scope_kind), + scope_target_text: toOptionalString(source.scope_target_text ?? + source.anchor_value ?? + source.organization ?? + source.warehouse ?? + source.counterparty ?? + source.contract ?? + source.item) ?? fallback.scope_target_text, + date_scope_kind: coerceSemanticDateScopeKind(source.date_scope_kind ?? source.date_scope ?? source.time_scope_kind), + self_scope_detected: coerceBoolean(source.self_scope_detected, fallback.self_scope_detected), + selected_object_scope_detected: coerceBoolean(source.selected_object_scope_detected, fallback.selected_object_scope_detected) + }; +} function mapCandidateLabel(value) { const token = normalizeToken(value); if (CANDIDATE_LABEL_VALUES.includes(token)) { @@ -359,6 +424,7 @@ function coerceFragmentV2(rawFragment, index, userMessage) { const accountHints = coerceStringArray(source.account_hints); const documentHints = coerceStringArray(source.document_hints); const registerHints = coerceStringArray(source.register_hints); + const timeScope = coerceTimeScope(source.time_scope, rawText, base.time_scope); return { fragment_id: coerceFragmentId(source.fragment_id, index, base.fragment_id), raw_fragment_text: rawText, @@ -369,8 +435,9 @@ function coerceFragmentV2(rawFragment, index, userMessage) { account_hints: accountHints.length > 0 ? accountHints : base.account_hints, document_hints: documentHints.length > 0 ? documentHints : base.document_hints, register_hints: registerHints.length > 0 ? registerHints : base.register_hints, - time_scope: coerceTimeScope(source.time_scope, rawText, base.time_scope), + time_scope: timeScope, flags, + semantic_hints: coerceSemanticHints(source.semantic_hints, rawText, timeScope), candidate_labels: coerceCandidateLabels(source.candidate_labels, flags, domainRelevance, base.candidate_labels), confidence: coerceConfidence(source.confidence, base.confidence) }; @@ -811,6 +878,7 @@ function buildFragmentV2(rawText, index) { else if (flags.asks_for_exact_object_trace || flags.asks_for_ranking_or_top) { confidence = "high"; } + const timeScope = inferTimeScope(text); return { fragment_id: `F${index + 1}`, raw_fragment_text: text, @@ -821,8 +889,9 @@ function buildFragmentV2(rawText, index) { account_hints: extractAccounts(text), document_hints: Array.from(new Set(Array.from(lower.matchAll(/(документ|реализац|поступлен|платеж|выписк|акт сверк)/g)).map((item) => item[0]))), register_hints: Array.from(new Set(Array.from(lower.matchAll(/(регистр|движен|остатк|сальдо)/g)).map((item) => item[0]))), - time_scope: inferTimeScope(text), + time_scope: timeScope, flags, + semantic_hints: inferSemanticHints(text, timeScope), candidate_labels: candidateLabels, confidence }; diff --git a/llm_normalizer/backend/src/schemas/normalized_query_v2.json b/llm_normalizer/backend/src/schemas/normalized_query_v2.json index 51e6d82..d3a283d 100644 --- a/llm_normalizer/backend/src/schemas/normalized_query_v2.json +++ b/llm_normalizer/backend/src/schemas/normalized_query_v2.json @@ -50,6 +50,7 @@ "register_hints", "time_scope", "flags", + "semantic_hints", "candidate_labels", "confidence" ], @@ -134,6 +135,41 @@ "mentions_period_close_context": { "type": "boolean" } } }, + "semantic_hints": { + "type": "object", + "additionalProperties": false, + "required": [ + "scope_target_kind", + "scope_target_text", + "date_scope_kind", + "self_scope_detected", + "selected_object_scope_detected" + ], + "properties": { + "scope_target_kind": { + "type": "string", + "enum": [ + "none", + "self_scope", + "selected_object", + "organization", + "warehouse", + "counterparty", + "contract", + "item" + ] + }, + "scope_target_text": { + "type": ["string", "null"] + }, + "date_scope_kind": { + "type": "string", + "enum": ["explicit", "implicit_current", "missing"] + }, + "self_scope_detected": { "type": "boolean" }, + "selected_object_scope_detected": { "type": "boolean" } + } + }, "candidate_labels": { "type": "array", "items": { diff --git a/llm_normalizer/backend/src/schemas/normalized_query_v2_0_1.json b/llm_normalizer/backend/src/schemas/normalized_query_v2_0_1.json index 87a66a4..d1e719b 100644 --- a/llm_normalizer/backend/src/schemas/normalized_query_v2_0_1.json +++ b/llm_normalizer/backend/src/schemas/normalized_query_v2_0_1.json @@ -50,6 +50,7 @@ "register_hints", "time_scope", "flags", + "semantic_hints", "candidate_labels", "confidence", "execution_readiness", @@ -120,6 +121,41 @@ "mentions_period_close_context": { "type": "boolean" } } }, + "semantic_hints": { + "type": "object", + "additionalProperties": false, + "required": [ + "scope_target_kind", + "scope_target_text", + "date_scope_kind", + "self_scope_detected", + "selected_object_scope_detected" + ], + "properties": { + "scope_target_kind": { + "type": "string", + "enum": [ + "none", + "self_scope", + "selected_object", + "organization", + "warehouse", + "counterparty", + "contract", + "item" + ] + }, + "scope_target_text": { + "type": ["string", "null"] + }, + "date_scope_kind": { + "type": "string", + "enum": ["explicit", "implicit_current", "missing"] + }, + "self_scope_detected": { "type": "boolean" }, + "selected_object_scope_detected": { "type": "boolean" } + } + }, "candidate_labels": { "type": "array", "items": { @@ -180,4 +216,3 @@ } } } - diff --git a/llm_normalizer/backend/src/schemas/normalized_query_v2_0_2.json b/llm_normalizer/backend/src/schemas/normalized_query_v2_0_2.json index 7d03129..2858070 100644 --- a/llm_normalizer/backend/src/schemas/normalized_query_v2_0_2.json +++ b/llm_normalizer/backend/src/schemas/normalized_query_v2_0_2.json @@ -50,6 +50,7 @@ "register_hints", "time_scope", "flags", + "semantic_hints", "candidate_labels", "confidence", "execution_readiness", @@ -122,6 +123,41 @@ "mentions_period_close_context": { "type": "boolean" } } }, + "semantic_hints": { + "type": "object", + "additionalProperties": false, + "required": [ + "scope_target_kind", + "scope_target_text", + "date_scope_kind", + "self_scope_detected", + "selected_object_scope_detected" + ], + "properties": { + "scope_target_kind": { + "type": "string", + "enum": [ + "none", + "self_scope", + "selected_object", + "organization", + "warehouse", + "counterparty", + "contract", + "item" + ] + }, + "scope_target_text": { + "type": ["string", "null"] + }, + "date_scope_kind": { + "type": "string", + "enum": ["explicit", "implicit_current", "missing"] + }, + "self_scope_detected": { "type": "boolean" }, + "selected_object_scope_detected": { "type": "boolean" } + } + }, "candidate_labels": { "type": "array", "items": { diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index 0e429fd..ccea138 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -1,4 +1,4 @@ -import type { AddressFilterExtraction, AddressFilterSet, AddressIntent } from "../types/addressQuery"; +import type { AddressFilterExtraction, AddressFilterSet, AddressIntent, AddressSemanticFrame } from "../types/addressQuery"; import iconv from "iconv-lite"; const ACCOUNT_PATTERN = /(?:сч[её]т|счет|account)[^0-9]{0,12}(\d{2}(?:[.,]\d{1,2})?)/i; @@ -1088,6 +1088,29 @@ function isTemporalWarehousePhrase(candidate: string): boolean { ); } +function normalizeSemanticAnchorCandidate(value: string): string { + return cleanupAnchorValue(value) + .toLowerCase() + .replace(/С‘/g, "Рµ") + .replace(/\s+/g, " ") + .trim(); +} + +function hasImplicitSelfScopeSignal(text: string): boolean { + return /(?:^|[\s,.;:!?()\-])(?:у\s+нас|у\s+себя|у\s+меня|наш(?:ем|ей|его|их|а|е)?|сво(?:ем|ей|его|их|я|е)?)(?=$|[\s,.;:!?()\-])/iu.test( + String(text ?? "") + ); +} + +function isImplicitSelfScopeWarehouseAnchor(candidate: string): boolean { + const normalized = normalizeSemanticAnchorCandidate(candidate); + return /^(?:у\s+нас|у\s+себя|у\s+меня|наш(?:ем|ей|его|их|а|е)?|сво(?:ем|ей|его|их|я|е)?)$/iu.test(normalized); +} + +function hasSelectedObjectScopeSignal(text: string): boolean { + return /(?:по\s+выбранному\s+объекту|selected\s+object)/iu.test(String(text ?? "")); +} + function extractInventoryWarehouseAnchor(text: string): string | undefined { const patterns = [ /(?:на|по)\s+склад(?:е|у|ом)?\s+[«"']?([^\r\n,.;:!?]+?)(?:[»"']|(?=\s+(?:на|по|за|с|в)\b|[?]|$))/iu, @@ -1109,6 +1132,7 @@ function extractInventoryWarehouseAnchor(text: string): string | undefined { !candidate || candidate.includes("->") || candidate.includes("=>") || + isImplicitSelfScopeWarehouseAnchor(candidate) || normalizedCandidate.startsWith("по состоянию") || isTemporalWarehousePhrase(candidate) || /^(?:сейчас|на|дату|дате|остаток|остатки)$/iu.test(candidate) @@ -1244,6 +1268,114 @@ function shouldDefaultAsOfDateToToday(intent: AddressIntent): boolean { ); } +function resolveSemanticDateScopeKind( + filters: AddressFilterSet, + warnings: string[] +): AddressSemanticFrame["date_scope_kind"] { + if (warnings.includes("as_of_date_defaulted_today")) { + return "implicit_current"; + } + if ( + (typeof filters.as_of_date === "string" && filters.as_of_date.trim().length > 0) || + (typeof filters.period_from === "string" && filters.period_from.trim().length > 0) || + (typeof filters.period_to === "string" && filters.period_to.trim().length > 0) + ) { + return "explicit"; + } + return "none"; +} + +function resolveSemanticDateBasisHint(filters: AddressFilterSet, warnings: string[]): AddressSemanticFrame["date_basis_hint"] { + if (warnings.includes("as_of_date_defaulted_today")) { + return "implicit_current_snapshot"; + } + const hasAsOfDate = typeof filters.as_of_date === "string" && filters.as_of_date.trim().length > 0; + const hasPeriodFrom = typeof filters.period_from === "string" && filters.period_from.trim().length > 0; + const hasPeriodTo = typeof filters.period_to === "string" && filters.period_to.trim().length > 0; + if (hasPeriodFrom && hasPeriodTo) { + return "period_range"; + } + if (hasAsOfDate) { + return "explicit_as_of_date"; + } + if (hasPeriodTo) { + return "period_end"; + } + if (hasPeriodFrom) { + return "period_range"; + } + return null; +} + +function buildSemanticFrame( + text: string, + filters: AddressFilterSet, + warnings: string[] +): AddressSemanticFrame { + const selfScopeDetected = hasImplicitSelfScopeSignal(text); + const selectedObjectScopeDetected = hasSelectedObjectScopeSignal(text); + const itemAnchor = typeof filters.item === "string" && filters.item.trim().length > 0 ? filters.item.trim() : null; + const warehouseAnchor = typeof filters.warehouse === "string" && filters.warehouse.trim().length > 0 ? filters.warehouse.trim() : null; + const counterpartyAnchor = + typeof filters.counterparty === "string" && filters.counterparty.trim().length > 0 ? filters.counterparty.trim() : null; + const contractAnchor = typeof filters.contract === "string" && filters.contract.trim().length > 0 ? filters.contract.trim() : null; + const organizationAnchor = + typeof filters.organization === "string" && filters.organization.trim().length > 0 ? filters.organization.trim() : null; + + if (selectedObjectScopeDetected && itemAnchor) { + return { + scope_kind: "selected_object_scope", + anchor_kind: "item", + anchor_value: itemAnchor, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: selfScopeDetected, + selected_object_scope_detected: true + }; + } + + if (selfScopeDetected && !warehouseAnchor) { + return { + scope_kind: "implicit_self_scope", + anchor_kind: "self_scope", + anchor_value: null, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: true, + selected_object_scope_detected: selectedObjectScopeDetected + }; + } + + const explicitAnchor = + itemAnchor ?? + warehouseAnchor ?? + counterpartyAnchor ?? + contractAnchor ?? + organizationAnchor ?? + null; + const anchorKind: AddressSemanticFrame["anchor_kind"] = itemAnchor + ? "item" + : warehouseAnchor + ? "warehouse" + : counterpartyAnchor + ? "counterparty" + : contractAnchor + ? "contract" + : organizationAnchor + ? "organization" + : "none"; + + return { + scope_kind: explicitAnchor ? "explicit_anchor" : "none", + anchor_kind: anchorKind, + anchor_value: explicitAnchor, + date_scope_kind: resolveSemanticDateScopeKind(filters, warnings), + date_basis_hint: resolveSemanticDateBasisHint(filters, warnings), + self_scope_detected: selfScopeDetected, + selected_object_scope_detected: selectedObjectScopeDetected + }; +} + export function extractAddressFilters(userMessage: string, intent: AddressIntent): AddressFilterExtraction { const rawText = String(userMessage ?? "").trim(); const text = normalizeMojibakeString(rawText); @@ -1302,6 +1434,11 @@ export function extractAddressFilters(userMessage: string, intent: AddressIntent const warehouseAnchor = extractInventoryWarehouseAnchor(text); if (warehouseAnchor) { filters.warehouse = warehouseAnchor; + } else if ( + (intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date") && + hasImplicitSelfScopeSignal(text) + ) { + warnings.push("warehouse_self_scope_detected"); } if (intent === "inventory_supplier_stock_overlap_as_of_date") { @@ -1511,10 +1648,12 @@ export function extractAddressFilters(userMessage: string, intent: AddressIntent const value = filters[key]; return value === undefined || value === null || String(value).trim() === ""; }); + const semanticFrame = buildSemanticFrame(text, filters, warnings); return { extracted_filters: filters, missing_required_filters: missingRequiredFilters, - warnings + warnings, + semantic_frame: semanticFrame }; } diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index d61026f..f3c4124 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -1553,7 +1553,10 @@ function hasInventoryAsOfCue(text: string): boolean { } function hasInventoryOnHandSignal(text: string): boolean { - const hasColloquialStockSnapshotCue = /(?:что|ч[её])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом)(?=$|[\s,.;:!?])/iu.test( + const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test( + text + ); + const hasStockStateCue = /(?:(?:что|ч[еёо])\s+там\s+на\s+склад(?:е|у|ом|ах)|(?:что|ч[еёо]).*происход(?:ит|ило|ящее).*(?:на\s+)?склад(?:е|у|ом|ах)|происход(?:ит|ило|ящее)\s+на\s+склад(?:е|у|ом|ах)|ситуац(?:ия|ии)\s+на\s+склад(?:е|у|ом|ах)|обстановк(?:а|и)\s+на\s+склад(?:е|у|ом|ах)|what(?:'s| is)?\s+(?:there\s+)?(?:on|in)\s+(?:the\s+)?(?:warehouse|stock)|what(?:'s| is)?\s+happening\s+(?:on|in)\s+(?:the\s+)?(?:warehouse|stock))/iu.test( text ); const hasAccount41Anchor = hasInventoryAccount41Anchor(text); @@ -1574,15 +1577,18 @@ function hasInventoryOnHandSignal(text: string): boolean { const hasGoodsLexeme = /(?:товар(?:ы|ов|ом|а|ные)?|номенклатур|материал(?:ы|ов|а|ам)?|item(?:s)?|sku|product(?:s)?)/iu.test(text); const hasBalanceLexeme = - /(?:леж(?:ит|ат)|есть|числ(?:ит(?:ся|сь)|ятся)|остат(?:ок|ки)|срез|на\s+дат|по\s+состоянию|на\s+конец|today|now|current|as\s+of)/iu.test( + /(?:леж(?:ит|ат)|есть|числ(?:ит(?:ся|сь)|ятся)|остат(?:ок|ки)|срез|на\s+дат|по\s+состоянию|на\s+конец|происход(?:ит|ило|ящее)|ситуац(?:ия|ии)|обстановк(?:а|и)|today|now|current|as\s+of)/iu.test( + text + ); + const hasRequestCue = + /(?:покажи|показать|выведи|дай|какие|что|ч[еёо]|какой|сколько|проверь|проверить|чекни|check|show|list|which|what)/iu.test( text ); - const hasRequestCue = /(?:покажи|показать|выведи|дай|какие|что|какой|сколько|show|list|which|what)/iu.test(text); if (hasAccount41Anchor && (hasGoodsLexeme || hasBalanceLexeme || hasRequestCue || hasInventoryAsOfCue(text))) { return true; } - return (hasGoodsLexeme || hasBalanceLexeme || hasColloquialStockSnapshotCue) && - (hasRequestCue || hasBalanceLexeme || hasColloquialStockSnapshotCue); + return (hasGoodsLexeme || hasBalanceLexeme || hasColloquialStockSnapshotCue || hasStockStateCue) && + (hasRequestCue || hasBalanceLexeme || hasColloquialStockSnapshotCue || hasStockStateCue); } function hasInventoryProvenanceSignal(text: string): boolean { diff --git a/llm_normalizer/backend/src/services/addressQueryClassifier.ts b/llm_normalizer/backend/src/services/addressQueryClassifier.ts index a475366..9e138e2 100644 --- a/llm_normalizer/backend/src/services/addressQueryClassifier.ts +++ b/llm_normalizer/backend/src/services/addressQueryClassifier.ts @@ -13,6 +13,14 @@ const ADDRESS_ACTION_TOKENS = [ "покажи", "покаж", "показ", + "проверь", + "провер", + "чекни", + "чекн", + "глянь", + "глян", + "посмотри", + "смотри", "список", "найди", "найд", diff --git a/llm_normalizer/backend/src/services/addressQueryService.ts b/llm_normalizer/backend/src/services/addressQueryService.ts index 7ab9dca..333a27c 100644 --- a/llm_normalizer/backend/src/services/addressQueryService.ts +++ b/llm_normalizer/backend/src/services/addressQueryService.ts @@ -14,13 +14,15 @@ import type { AddressExecutionResult, AddressFilterSet, AddressIntent, + AddressLlmSemanticHints, AddressLimitedReasonCategory, AddressMatchFailureStage, AddressMcpCallStatus, AddressQueryShapeDetection, AddressResultMode, AddressResponseType, - AddressRuntimeReadiness + AddressRuntimeReadiness, + AddressSemanticFrame } from "../types/addressQuery"; import { buildAddressRecipePlan, @@ -47,6 +49,12 @@ import { resolveShadowRouteIntent } from "./addressCapabilityPolicy"; import { evaluateAddressRouteExpectation, type AddressRouteExpectationAudit } from "./addressRouteExpectations"; +import { + mergeKnownOrganizations, + normalizeOrganizationScopeSearchText, + normalizeOrganizationScopeValue, + resolveOrganizationSelectionFromMessage +} from "./assistantOrganizationMatcher"; interface NormalizedAddressRow { period: string | null; @@ -64,6 +72,9 @@ interface NormalizedAddressRow { interface AddressTryHandleOptions { followupContext?: AddressFollowupContext | null; analysisDateHint?: string | null; + llmSemanticHints?: AddressLlmSemanticHints | null; + activeOrganization?: string | null; + knownOrganizations?: string[]; } interface AddressCapabilityAudit { @@ -1446,6 +1457,60 @@ function isCounterpartyRiskIntent(intent: AddressIntent): boolean { ); } +function sameNormalizedOrganizationScope(left: string | null | undefined, right: string | null | undefined): boolean { + return normalizeOrganizationScopeSearchText(left ?? "") === normalizeOrganizationScopeSearchText(right ?? ""); +} + +function applyPreExecutionOrganizationScopeGrounding(input: { + userMessage: string; + filters: AddressFilterSet; + semanticFrame: AddressSemanticFrame | null; + warnings: string[]; + baseReasons: string[]; + activeOrganization?: string | null; + knownOrganizations?: string[]; +}): string | null { + const activeOrganization = normalizeOrganizationScopeValue(input.activeOrganization ?? null); + const candidateOrganizations = mergeKnownOrganizations([ + ...(Array.isArray(input.knownOrganizations) ? input.knownOrganizations : []), + activeOrganization + ]); + const resolvedOrganizationFromMessage = resolveOrganizationSelectionFromMessage(input.userMessage, candidateOrganizations); + + if ( + !input.filters.organization && + input.semanticFrame?.scope_kind === "implicit_self_scope" && + activeOrganization + ) { + input.filters.organization = activeOrganization; + if (!input.warnings.includes("organization_from_active_scope")) { + input.warnings.push("organization_from_active_scope"); + } + if (!input.baseReasons.includes("organization_from_active_scope")) { + input.baseReasons.push("organization_from_active_scope"); + } + } + + if ( + resolvedOrganizationFromMessage && + (!input.filters.organization || input.semanticFrame?.anchor_kind === "organization") && + !sameNormalizedOrganizationScope(input.filters.organization ?? null, resolvedOrganizationFromMessage) + ) { + input.filters.organization = resolvedOrganizationFromMessage; + if (!input.warnings.includes("organization_grounded_from_scope_candidates")) { + input.warnings.push("organization_grounded_from_scope_candidates"); + } + if (!input.baseReasons.includes("organization_grounded_from_scope_candidates")) { + input.baseReasons.push("organization_grounded_from_scope_candidates"); + } + if (input.semanticFrame?.anchor_kind === "organization") { + input.semanticFrame.anchor_value = resolvedOrganizationFromMessage; + } + } + + return resolvedOrganizationFromMessage; +} + function isHeuristicCandidatesIntent(intent: AddressIntent): boolean { return ( intent === "list_receivables_counterparties" || @@ -1472,7 +1537,10 @@ function isConfirmedBalanceIntent(intent: AddressIntent): boolean { ); } -function resolveAsOfDateBasis(filters: AddressFilterSet): AddressAsOfDateBasis | null { +function resolveAsOfDateBasis(filters: AddressFilterSet, semanticFrame?: AddressSemanticFrame | null): AddressAsOfDateBasis | null { + if (semanticFrame?.date_basis_hint) { + return semanticFrame.date_basis_hint; + } const asOfDate = normalizeAnalysisDateHint(filters.as_of_date); if (asOfDate) { return "explicit_as_of_date"; @@ -1515,7 +1583,11 @@ function deriveAddressEvidenceStrength(input: { return undefined; } -function resolveRequestedResultMode(intent: AddressIntent, filters: AddressFilterSet): AddressResultMode | undefined { +function resolveRequestedResultMode( + intent: AddressIntent, + filters: AddressFilterSet, + semanticFrame?: AddressSemanticFrame | null +): AddressResultMode | undefined { if (isConfirmedBalanceIntent(intent)) { return "confirmed_balance"; } @@ -1523,8 +1595,13 @@ function resolveRequestedResultMode(intent: AddressIntent, filters: AddressFilte return "heuristic_candidates"; } if (isHeuristicCandidatesIntent(intent)) { - const asOfDateBasis = resolveAsOfDateBasis(filters); - if (asOfDateBasis === "explicit_as_of_date" || asOfDateBasis === "period_end" || asOfDateBasis === "period_range") { + const asOfDateBasis = resolveAsOfDateBasis(filters, semanticFrame); + if ( + asOfDateBasis === "explicit_as_of_date" || + asOfDateBasis === "period_end" || + asOfDateBasis === "period_range" || + asOfDateBasis === "implicit_current_snapshot" + ) { return "confirmed_balance"; } return "heuristic_candidates"; @@ -1536,6 +1613,7 @@ function deriveAddressResultSemantics(input: { intent: AddressIntent; selectedRecipe: string | null; filters: AddressFilterSet; + semanticFrame?: AddressSemanticFrame | null; responseType: AddressResponseType; rowsMatched: number; }): { @@ -1545,8 +1623,8 @@ function deriveAddressResultSemantics(input: { balance_confirmed?: boolean; as_of_date_basis?: AddressAsOfDateBasis | null; } { - const asOfDateBasis = resolveAsOfDateBasis(input.filters); - const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters); + const asOfDateBasis = resolveAsOfDateBasis(input.filters, input.semanticFrame); + const requestedResultMode = resolveRequestedResultMode(input.intent, input.filters, input.semanticFrame); if (isHeuristicCandidatesIntent(input.intent)) { return { requested_result_mode: requestedResultMode, @@ -1897,6 +1975,10 @@ function shouldBoostAutoBroadenedLimit(intent: AddressIntent): boolean { ); } +function shouldClearAsOfDateForHistoryRecovery(intent: AddressIntent): boolean { + return intent === "inventory_purchase_provenance_for_item" || intent === "inventory_purchase_documents_for_item"; +} + function invertSort(sort: AddressFilterSet["sort"]): AddressFilterSet["sort"] { return sort === "period_asc" ? "period_desc" : "period_asc"; } @@ -2609,16 +2691,18 @@ function buildLimitedExecutionResult(input: { capabilityAudit?: AddressCapabilityAudit; shadowRouteAudit?: AddressShadowRouteAudit; routeExpectationAudit?: AddressRouteExpectationAuditState; + semanticFrame?: AddressSemanticFrame | null; }): AddressExecutionResult { const accountScopeAudit = input.accountScopeAudit ?? buildDefaultAccountScopeAudit(input.filters); const resultSemantics = deriveAddressResultSemantics({ intent: input.intent.intent, selectedRecipe: input.selectedRecipe, filters: input.filters, + semanticFrame: input.semanticFrame, responseType: "LIMITED_WITH_REASON", rowsMatched: input.rowsMatched }); - const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters); + const requestedResultMode = resolveRequestedResultMode(input.intent.intent, input.filters, input.semanticFrame); const reasonsWithConfirmedFallback = withConfirmedBalanceFallbackReason( input.reasons, requestedResultMode, @@ -2698,6 +2782,7 @@ function buildLimitedExecutionResult(input: { account_scope_drop_reason: accountScopeAudit.accountScopeDropReason, runtime_readiness: runtimeReadinessForLimitedCategory(input.category), limited_reason_category: input.category, + semantic_frame: input.semanticFrame ?? null, response_type: "LIMITED_WITH_REASON", capability_id: input.capabilityAudit?.capabilityId ?? null, capability_layer: input.capabilityAudit?.layer ?? null, @@ -2726,11 +2811,12 @@ export class AddressQueryService { } const followupContext = options.followupContext ?? null; - const decompose = runAddressDecomposeStage(userMessage, followupContext); + const decompose = runAddressDecomposeStage(userMessage, followupContext, options.llmSemanticHints ?? null); if (!decompose) { return null; } const { mode, shape, intent, filters } = decompose; + const semanticFrame = filters.semantic_frame ?? null; const baseReasons = [...decompose.baseReasons]; const analysisDate = normalizeAnalysisDateHint(options.analysisDateHint); if (analysisDate) { @@ -2748,7 +2834,16 @@ export class AddressQueryService { baseReasons.push("as_of_date_from_analysis_context"); } } - const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters); + const resolvedOrganizationFromMessage = applyPreExecutionOrganizationScopeGrounding({ + userMessage, + filters: filters.extracted_filters, + semanticFrame, + warnings: filters.warnings, + baseReasons, + activeOrganization: options.activeOrganization ?? null, + knownOrganizations: options.knownOrganizations ?? [] + }); + const requestedResultMode = resolveRequestedResultMode(intent.intent, filters.extracted_filters, semanticFrame); const confirmedBalancePayablesIntent = (intent.intent === "list_payables_counterparties" || intent.intent === "payables_confirmed_as_of_date") && requestedResultMode === "confirmed_balance"; @@ -2771,7 +2866,7 @@ export class AddressQueryService { const inventoryConfirmedExecution = confirmedBalanceInventoryIntent ? resolveExecutionFiltersForConfirmedBalance(filters.extracted_filters, analysisDate) : null; - const executionFilters = + let executionFilters = inventoryConfirmedExecution?.executionFilters ?? payablesConfirmedExecution?.executionFilters ?? receivablesConfirmedExecution?.executionFilters ?? @@ -2847,6 +2942,7 @@ export class AddressQueryService { ...baseReasons, FEATURE_ASSISTANT_CAPABILITY_ROUTE_GUARD_V1 ? "capability_route_guard_blocked" : "capability_route_guard_skipped" ], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2949,6 +3045,7 @@ export class AddressQueryService { nextStep: "могу проверить близкие сценарии: документы/платежи по контрагенту, договоры или остаток по счету", limitations: ["intent_not_supported_in_v1"], reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2971,6 +3068,7 @@ export class AddressQueryService { nextStep: "можно выбрать близкий поддерживаемый сценарий или переключить запрос в режим расширенной проверки", limitations: ["recipe_not_available"], reasons: [...baseReasons, ...recipeSelection.selection_reason], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -2993,6 +3091,7 @@ export class AddressQueryService { nextStep: `уточните: ${recipeSelection.missing_required_filters.join(", ")}`, limitations: ["missing_required_filters"], reasons: [...baseReasons, ...recipeSelection.selection_reason], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -3015,6 +3114,7 @@ export class AddressQueryService { nextStep: "включите FEATURE_ASSISTANT_ADDRESS_QUERY_LIVE_V1", limitations: ["address_live_lane_disabled"], reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -3200,6 +3300,7 @@ export class AddressQueryService { nextStep: mcp.error, limitations: ["mcp_call_failed"], reasons: [...baseReasons, mcp.error], + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -3214,7 +3315,7 @@ export class AddressQueryService { scopedRows.length === 0; const normalizedRows = accountScopeFallbackApplied ? normalizedRawRows : scopedRows; anchor = refineAnchorFromRows(anchor, normalizedRows); - const filtersForMatching: AddressFilterSet = + let filtersForMatching: AddressFilterSet = anchor.anchor_type === "counterparty" && anchor.anchor_value_resolved ? { ...executionFilters, counterparty: anchor.anchor_value_resolved } : anchor.anchor_type === "contract" && anchor.anchor_value_resolved @@ -3227,15 +3328,65 @@ export class AddressQueryService { rowsBeforeScope: normalizedRawRows.length, rowsAfterScope: normalizedRows.length }); - const anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); - const filterByAnchors = anchorFilter.rows; - const filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); - const filteredRowsFutureGuard = applyFutureDatedRowsGuard( + let anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); + let filterByAnchors = anchorFilter.rows; + let filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); + let filteredRowsFutureGuard = applyFutureDatedRowsGuard( filteredRowsBeforeFutureGuard, intent.intent, futureGuardReferenceDate ); - const filteredRows = filteredRowsFutureGuard.rows; + let filteredRows = filteredRowsFutureGuard.rows; + let organizationWarehouseRecoveryApplied = false; + if ( + filteredRows.length === 0 && + anchorFilter.mismatchReason === "warehouse_anchor_not_matched_in_materialized_rows" && + resolvedOrganizationFromMessage + ) { + filters.extracted_filters = { + ...filters.extracted_filters, + organization: resolvedOrganizationFromMessage + }; + delete filters.extracted_filters.warehouse; + executionFilters = { + ...executionFilters, + organization: resolvedOrganizationFromMessage + }; + delete executionFilters.warehouse; + filtersForMatching = { + ...filtersForMatching, + organization: resolvedOrganizationFromMessage + }; + delete filtersForMatching.warehouse; + anchor = { + ...anchor, + anchor_type: "organization", + anchor_value_raw: anchor.anchor_value_raw, + anchor_value_resolved: resolvedOrganizationFromMessage, + resolver_confidence: "medium" + }; + if (semanticFrame) { + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "organization"; + semanticFrame.anchor_value = resolvedOrganizationFromMessage; + } + if (!filters.warnings.includes("warehouse_anchor_regrounded_to_organization_scope")) { + filters.warnings.push("warehouse_anchor_regrounded_to_organization_scope"); + } + if (!baseReasons.includes("warehouse_anchor_regrounded_to_organization_scope")) { + baseReasons.push("warehouse_anchor_regrounded_to_organization_scope"); + } + anchorFilter = applyAddressFilters(normalizedRows, filtersForMatching); + filterByAnchors = anchorFilter.rows; + filteredRowsBeforeFutureGuard = applyIntentSpecificFilter(intent.intent, filterByAnchors); + filteredRowsFutureGuard = applyFutureDatedRowsGuard( + filteredRowsBeforeFutureGuard, + intent.intent, + futureGuardReferenceDate + ); + filteredRows = filteredRowsFutureGuard.rows; + organizationWarehouseRecoveryApplied = filteredRows.length > 0; + } if (filteredRowsFutureGuard.droppedCount > 0) { if (!filters.warnings.includes("future_rows_excluded_from_response")) { filters.warnings.push("future_rows_excluded_from_response"); @@ -3263,6 +3414,11 @@ export class AddressQueryService { : matchFailureStage === "materialized_but_filtered_out_by_recipe" ? "rows_filtered_out_by_intent_recipe_after_anchor_match" : null; + if (organizationWarehouseRecoveryApplied) { + if (!baseReasons.includes("organization_scope_live_grounding_recovered_rows")) { + baseReasons.push("organization_scope_live_grounding_recovered_rows"); + } + } if (filteredRows.length === 0 && intent.intent === "list_documents_by_contract" && filterByAnchors.length > 0) { const recoveredBankRows = applyIntentSpecificFilter("bank_operations_by_contract", filterByAnchors); @@ -3324,6 +3480,7 @@ export class AddressQueryService { intent: intent.intent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: factual.responseType, rowsMatched: recoveredRows.length }), @@ -3472,6 +3629,7 @@ export class AddressQueryService { intent: intent.intent, selectedRecipe: expandedSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: expandedFactual.responseType, rowsMatched: expandedFilteredRows.length }), @@ -3494,8 +3652,13 @@ export class AddressQueryService { if (filteredRows.length === 0 && canAutoBroadenPeriodWindow(intent.intent, filters.extracted_filters)) { const autoBroadenedFilters: AddressFilterSet = { ...filters.extracted_filters }; + const broadenedAdjustments: string[] = []; delete autoBroadenedFilters.period_from; delete autoBroadenedFilters.period_to; + if (stageStatus === "no_raw_rows" && shouldClearAsOfDateForHistoryRecovery(intent.intent) && toNonEmptyFilterValue(autoBroadenedFilters.as_of_date)) { + delete autoBroadenedFilters.as_of_date; + broadenedAdjustments.push("as_of_date_cleared_for_history_recovery"); + } if (shouldBoostAutoBroadenedLimit(intent.intent)) { autoBroadenedFilters.limit = Math.max( ADDRESS_ANCHOR_RECOVERY_LIMIT, @@ -3571,13 +3734,18 @@ export class AddressQueryService { broadenedFilteredRows, composeOptionsFromFilters(autoBroadenedFilters) ); - const broadenedLimitations = [...filters.warnings, "period_window_auto_broadened_to_available_data"]; - const broadenedReasons = [...baseReasons, "period_window_auto_broadened_to_available_data"]; + const broadenedLimitations = [ + ...filters.warnings, + ...broadenedAdjustments, + "period_window_auto_broadened_to_available_data" + ]; + const broadenedReasons = [...baseReasons, ...broadenedAdjustments, "period_window_auto_broadened_to_available_data"]; const broadenedResultSemantics = mergeAddressResultSemantics( deriveAddressResultSemantics({ intent: intent.intent, selectedRecipe: broadenedSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: broadenedFactual.responseType, rowsMatched: broadenedFilteredRows.length }), @@ -3645,6 +3813,7 @@ export class AddressQueryService { route_expectation_expected_requested_result_modes: broadenedRouteExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: broadenedRouteExpectationAudit.expectedResultModes, + semantic_frame: semanticFrame, ...broadenedResultSemantics, limitations: broadenedLimitations, reasons: withConfirmedBalanceFallbackReason( @@ -3793,6 +3962,7 @@ export class AddressQueryService { intent: intent.intent, selectedRecipe: historicalSelection.selected_recipe.recipe_id, filters: filters.extracted_filters, + semanticFrame, responseType: historicalFactual.responseType, rowsMatched: historicalFilteredRows.length }), @@ -3879,6 +4049,7 @@ export class AddressQueryService { intent: intent.intent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: fallbackFactual.responseType, rowsMatched: documentBankFallbackRows.length }), @@ -4016,6 +4187,7 @@ export class AddressQueryService { nextStep, limitations, reasons: baseReasons, + semanticFrame, capabilityAudit, shadowRouteAudit }); @@ -4059,6 +4231,7 @@ export class AddressQueryService { intent: composeIntent, selectedRecipe: effectiveRecipeId, filters: filters.extracted_filters, + semanticFrame, responseType: factual.responseType, rowsMatched: filteredRows.length }), @@ -4098,6 +4271,7 @@ export class AddressQueryService { nextStep: "проверьте intent/recipe mapping или отключите FEATURE_ASSISTANT_ROUTE_EXPECTATION_HARD_GUARD_V1 для безопасного rollout", limitations: ["route_expectation_mismatch_guard_blocked"], reasons: [...baseReasons, `route_expectation_mismatch:${finalRouteExpectationAudit.reason}`], + semanticFrame, capabilityAudit, shadowRouteAudit, routeExpectationAudit: finalRouteExpectationAudit @@ -4150,6 +4324,7 @@ export class AddressQueryService { : "specify as_of_date/counterparty or enable detailed settlement registers for exact confirmed balance", limitations: [`exact_${exactModeName}_mode_unconfirmed_output_blocked`], reasons: [...baseReasons, `exact_${exactModeName}_mode_unconfirmed_output_blocked`], + semanticFrame, capabilityAudit, shadowRouteAudit, routeExpectationAudit: finalRouteExpectationAudit @@ -4214,6 +4389,7 @@ export class AddressQueryService { route_expectation_expected_selected_recipes: finalRouteExpectationAudit.expectedSelectedRecipes, route_expectation_expected_requested_result_modes: finalRouteExpectationAudit.expectedRequestedResultModes, route_expectation_expected_result_modes: finalRouteExpectationAudit.expectedResultModes, + semantic_frame: semanticFrame, ...factualResultSemantics, limitations: factualLimitations, reasons: withConfirmedBalanceFallbackReason( diff --git a/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts b/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts index 08cbb37..149bfbc 100644 --- a/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts +++ b/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts @@ -3,19 +3,45 @@ AddressIntent, AddressIntentResolution, AddressModeDetection, - AddressQueryShapeDetection + AddressQueryShapeDetection, + AddressSemanticFrame } from "../../types/addressQuery"; import { detectAddressQuestionMode } from "../addressQueryClassifier"; import { classifyAddressQueryShape } from "../addressQueryShapeClassifier"; import { resolveAddressIntent } from "../addressIntentResolver"; import { extractAddressFilters } from "../addressFilterExtractor"; +import { applyAddressLlmSemanticHintsToExtraction } from "./semanticHintOverlay"; +import type { AddressLlmSemanticHints } from "../../types/addressQuery"; export interface AddressFollowupContext { previous_intent?: AddressIntent; previous_filters?: AddressFilterSet; - previous_anchor_type?: "account" | "counterparty" | "contract" | "document_ref" | "item" | "warehouse" | "unknown" | null; + previous_anchor_type?: + | "account" + | "counterparty" + | "contract" + | "document_ref" + | "item" + | "organization" + | "warehouse" + | "unknown" + | null; previous_anchor_value?: string | null; resolved_counterparty_from_display?: boolean; + root_intent?: AddressIntent; + root_filters?: AddressFilterSet; + root_anchor_type?: + | "account" + | "counterparty" + | "contract" + | "document_ref" + | "item" + | "organization" + | "warehouse" + | "unknown" + | null; + root_anchor_value?: string | null; + current_frame_kind?: "generic" | "inventory_root" | "inventory_drilldown"; } export interface AddressDecomposeStageResult { @@ -26,6 +52,7 @@ export interface AddressDecomposeStageResult { extracted_filters: AddressFilterSet; missing_required_filters: string[]; warnings: string[]; + semantic_frame?: AddressSemanticFrame; }; baseReasons: string[]; } @@ -318,6 +345,159 @@ function isInventoryIntent(intent: AddressIntent | undefined): boolean { ); } +function isInventoryRootFrameIntent(intent: AddressIntent | undefined): boolean { + return intent === "inventory_on_hand_as_of_date"; +} + +function isInventoryDrilldownFrameIntent(intent: AddressIntent | undefined): boolean { + return ( + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date" + ); +} + +function buildInventoryRootFollowupContext( + followupContext: AddressFollowupContext | null +): AddressFollowupContext | null { + if (!followupContext || !followupContext.root_intent || !followupContext.root_filters) { + return followupContext; + } + return { + ...followupContext, + previous_intent: followupContext.root_intent, + previous_filters: { ...followupContext.root_filters }, + previous_anchor_type: followupContext.root_anchor_type ?? followupContext.previous_anchor_type, + previous_anchor_value: followupContext.root_anchor_value ?? followupContext.previous_anchor_value, + current_frame_kind: "inventory_root" + }; +} + +function getTokenCount(text: string): number { + return String(text ?? "") + .trim() + .split(/\s+/) + .filter(Boolean).length; +} + +function resolveMonthNumberFromText(text: string): number | null { + const normalized = String(text ?? "").toLowerCase(); + if (!normalized) { + return null; + } + if (/январ|january|jan/iu.test(normalized)) return 1; + if (/феврал|february|feb/iu.test(normalized)) return 2; + if (/март|march|mar/iu.test(normalized)) return 3; + if (/апрел|april|apr/iu.test(normalized)) return 4; + if (/(?:^|[\s,.;:!?()\-])ма(?:й|е|я)(?=$|[\s,.;:!?()\-])|may/iu.test(normalized)) return 5; + if (/июн|june|jun/iu.test(normalized)) return 6; + if (/июл|july|jul/iu.test(normalized)) return 7; + if (/август|august|aug/iu.test(normalized)) return 8; + if (/сентябр|september|sep/iu.test(normalized)) return 9; + if (/октябр|october|oct/iu.test(normalized)) return 10; + if (/ноябр|november|nov/iu.test(normalized)) return 11; + if (/декабр|december|dec/iu.test(normalized)) return 12; + return null; +} + +function resolveYearFromFilters(filters: AddressFilterSet | null | undefined): number | null { + const candidates = [ + toNonEmptyString(filters?.as_of_date), + toNonEmptyString(filters?.period_to), + toNonEmptyString(filters?.period_from) + ]; + for (const candidate of candidates) { + const match = candidate?.match(/\b((?:19|20)\d{2})\b/u); + if (match) { + const year = Number(match[1]); + if (Number.isFinite(year)) { + return year; + } + } + } + return null; +} + +function hasRelativeYearHint(text: string): boolean { + return /(?:эт(?:от|ого)(?:\s+же)?\s+год|этого\s+же\s+года|того\s+же\s+года|this\s+year|same\s+year|that\s+year)/iu.test( + String(text ?? "") + ); +} + +function resolveRelativeMonthPeriodFromInventoryRoot( + userMessage: string, + followupContext: AddressFollowupContext | null +): { period_from: string; period_to: string; as_of_date: string } | null { + if (!followupContext || !isInventoryRootFrameIntent(followupContext.root_intent)) { + return null; + } + const month = resolveMonthNumberFromText(userMessage); + if (!month) { + return null; + } + const normalized = String(userMessage ?? ""); + if (hasExplicitPeriodLiteral(normalized) || hasExplicitCurrentDateHint(normalized)) { + return null; + } + const shortTemporalPatch = getTokenCount(normalized) <= 8 || hasRelativeYearHint(normalized); + if (!shortTemporalPatch) { + return null; + } + const year = resolveYearFromFilters(followupContext.root_filters); + if (!year) { + return null; + } + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + const periodFrom = `${year}-${String(month).padStart(2, "0")}-01`; + const periodTo = `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}`; + return { + period_from: periodFrom, + period_to: periodTo, + as_of_date: periodTo + }; +} + +function shouldRestoreInventoryRootFrame( + userMessage: string, + intent: AddressIntent, + extractedFilters: AddressFilterSet, + followupContext: AddressFollowupContext | null +): boolean { + if (!followupContext || !isInventoryRootFrameIntent(followupContext.root_intent)) { + return false; + } + const currentFrameKind = followupContext.current_frame_kind ?? null; + const previousIntent = followupContext.previous_intent; + const comingFromInventoryDrilldown = + currentFrameKind === "inventory_drilldown" || isInventoryDrilldownFrameIntent(previousIntent); + if (!comingFromInventoryDrilldown) { + return false; + } + const normalized = String(userMessage ?? ""); + if ( + hasSelectedObjectInventorySignal(normalized) || + hasInventorySupplierFollowupCue(normalized) || + hasInventoryPurchaseDocumentsFollowupCue(normalized) || + hasInventoryPurchaseDateFollowupCue(normalized) || + hasBareInventoryPurchaseDateFollowupCue(normalized) || + hasInventorySaleFollowupCue(normalized) || + hasInventoryPurchaseToSaleChainFollowupCue(normalized) + ) { + return false; + } + if (intent === "inventory_on_hand_as_of_date") { + return true; + } + const hasTemporalPatch = + hasExplicitPeriodWindow(extractedFilters) || + Boolean(toNonEmptyString(extractedFilters.as_of_date)) || + hasExplicitPeriodLiteral(normalized) || + Boolean(resolveRelativeMonthPeriodFromInventoryRoot(normalized, followupContext)); + return hasTemporalPatch; +} + function hasSelectedObjectInventorySignal(text: string): boolean { return /(?:по\s+выбранному\s+объекту|for\s+selected\s+object)/iu.test(String(text ?? "")); } @@ -456,6 +636,7 @@ function mergeFollowupFilters( const previousAsOfDate = toNonEmptyString(previous.as_of_date); const previousPeriodFrom = toNonEmptyString(previous.period_from); const previousPeriodTo = toNonEmptyString(previous.period_to); + const relativeMonthFromInventoryRoot = resolveRelativeMonthPeriodFromInventoryRoot(userMessage, followupContext); const allTimeRequested = hasAllTimeHint(userMessage); const sameDateRequested = hasSameDateHint(userMessage); if (!toNonEmptyString(merged.organization) && previousOrganization) { @@ -648,6 +829,15 @@ function mergeFollowupFilters( reasons.push("as_of_date_from_open_items_followup_context"); } } + if ( + relativeMonthFromInventoryRoot && + (intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date") + ) { + merged.period_from = relativeMonthFromInventoryRoot.period_from; + merged.period_to = relativeMonthFromInventoryRoot.period_to; + merged.as_of_date = relativeMonthFromInventoryRoot.as_of_date; + reasons.push("period_derived_from_inventory_root_frame_year"); + } if (intent === "inventory_aging_by_purchase_date") { const explicitItemMention = /(?:^|[\s,.;:!?()\-\u2014])(?:товар(?:у|а|ом)?|позици(?:и|я|ю)|item|row|line)(?=$|[\s,.;:!?()\-\u2014])/iu.test( String(userMessage ?? "") @@ -1016,7 +1206,8 @@ function deriveIntentWithFollowupContext( export function runAddressDecomposeStage( userMessage: string, - followupContext: AddressFollowupContext | null + followupContext: AddressFollowupContext | null, + llmSemanticHints: AddressLlmSemanticHints | null = null ): AddressDecomposeStageResult | null { const detectedMode = detectAddressQuestionMode(userMessage); const shape = classifyAddressQueryShape(userMessage); @@ -1047,18 +1238,48 @@ export function runAddressDecomposeStage( if (mode.mode !== "address_query") { return null; } - const intent = deriveIntentWithFollowupContext(detectedIntent, userMessage, followupContext); - const extractedFilters = extractAddressFilters(userMessage, intent.intent); - const followupMerged = mergeFollowupFilters(extractedFilters.extracted_filters, intent.intent, userMessage, followupContext); + let effectiveFollowupContext = followupContext; + let intent = deriveIntentWithFollowupContext(detectedIntent, userMessage, effectiveFollowupContext); + let extractedFilters = applyAddressLlmSemanticHintsToExtraction( + extractAddressFilters(userMessage, intent.intent), + llmSemanticHints + ); + if ( + shouldRestoreInventoryRootFrame( + userMessage, + intent.intent, + extractedFilters.extracted_filters, + effectiveFollowupContext + ) + ) { + effectiveFollowupContext = buildInventoryRootFollowupContext(effectiveFollowupContext); + intent = { + intent: effectiveFollowupContext?.root_intent ?? "inventory_on_hand_as_of_date", + confidence: "low", + reasons: [...intent.reasons, "intent_restored_to_inventory_root_frame"] + }; + extractedFilters = applyAddressLlmSemanticHintsToExtraction( + extractAddressFilters(userMessage, intent.intent), + llmSemanticHints + ); + } + const followupMerged = mergeFollowupFilters( + extractedFilters.extracted_filters, + intent.intent, + userMessage, + effectiveFollowupContext + ); const filters = { extracted_filters: followupMerged.filters, missing_required_filters: resolveMissingRequiredFilters(intent.intent, followupMerged.filters), - warnings: [...new Set([...extractedFilters.warnings, ...followupMerged.reasons])] + warnings: [...new Set([...extractedFilters.warnings, ...followupMerged.reasons])], + semantic_frame: extractedFilters.semantic_frame }; const followupContextApplied = - Boolean(followupContext) && + Boolean(effectiveFollowupContext) && (mode.reasons.includes("address_mode_from_followup_context") || intent.reasons.includes("intent_from_followup_context") || + intent.reasons.includes("intent_restored_to_inventory_root_frame") || followupMerged.reasons.length > 0); const baseReasons = [ ...mode.reasons, diff --git a/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts b/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts index 640711b..7243894 100644 --- a/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts +++ b/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts @@ -1,8 +1,16 @@ -import type { AddressFilterSet, AddressIntent, AddressQuestionMode, AddressQueryShape } from "../../types/addressQuery"; +import type { + AddressLlmSemanticHints, + AddressFilterSet, + AddressIntent, + AddressQuestionMode, + AddressQueryShape, + AddressSemanticFrame +} from "../../types/addressQuery"; import { detectAddressQuestionMode } from "../addressQueryClassifier"; import { classifyAddressQueryShape } from "../addressQueryShapeClassifier"; import { resolveAddressIntent } from "../addressIntentResolver"; import { extractAddressFilters } from "../addressFilterExtractor"; +import { applyAddressLlmSemanticHintsToExtraction } from "./semanticHintOverlay"; export type AddressPredecomposePeriodScope = "all_time" | "year" | "range" | "as_of" | "unspecified"; @@ -40,6 +48,7 @@ export interface AddressLlmPredecomposeContractV1 { as_of_date: string | null; has_explicit_period: boolean; }; + semantics: AddressSemanticFrame; aggregation_profile: AddressPredecomposeAggregationProfile; } @@ -59,6 +68,7 @@ export interface AddressSemanticExtractionContractV1 { }; entities: AddressLlmPredecomposeContractV1["entities"]; period: AddressLlmPredecomposeContractV1["period"]; + semantics: AddressLlmPredecomposeContractV1["semantics"]; guard_hints: { source_data_signal_detected: boolean; canonical_data_signal_detected: boolean; @@ -75,7 +85,7 @@ export interface AddressSemanticExtractionContractV1 { } const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN = - /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu; + /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|\u0441\u043a\u043b\u0430\u0434|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|counterparty|contract|document|account|balance|turnover|operations?|warehouse|stock|inventory|item|goods|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu; const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu; @@ -84,7 +94,7 @@ const ADDRESS_SEMANTIC_SCOPE_META_PATTERN = /(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu; const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN = - /(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu; + /(?:\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition|\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c).*(?:\u0445\u0432\u043e\u0441\u0442|\u0440\u0430\u0437\u0440\u044b\u0432|\u0437\u0430\u043a\u0440\u044b\u0442|\u0446\u0435\u043f\u043e\u0447|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u043e\u0448\u0438\u0431|\u0430\u043d\u043e\u043c\u0430\u043b|\u0440\u0438\u0441\u043a|\u0441\u0432\u0435\u0440\u043a)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c).*(?:\u043f\u043e\u0447\u0435\u043c\u0443|\u0445\u0432\u043e\u0441\u0442|\u0440\u0430\u0437\u0440\u044b\u0432|\u0437\u0430\u043a\u0440\u044b\u0442|\u0446\u0435\u043f\u043e\u0447|\u043e\u0448\u0438\u0431|\u0430\u043d\u043e\u043c\u0430\u043b|\u0440\u0438\u0441\u043a))/iu; function normalizeCompact(value: unknown): string { return String(value ?? "") @@ -232,6 +242,7 @@ function inferAggregationProfile(intent: AddressIntent, shape: AddressQueryShape export function buildAddressLlmPredecomposeContractV1(input: { sourceMessage: string; canonicalMessage: string; + semanticHints?: AddressLlmSemanticHints | null; }): AddressLlmPredecomposeContractV1 { const sourceMessage = String(input.sourceMessage ?? "").trim(); const canonicalMessage = String(input.canonicalMessage ?? "").trim() || sourceMessage; @@ -239,8 +250,20 @@ export function buildAddressLlmPredecomposeContractV1(input: { const mode = detectAddressQuestionMode(canonicalMessage); const shape = classifyAddressQueryShape(canonicalMessage); const intent = resolveAddressIntent(canonicalMessage); - const extraction = extractAddressFilters(canonicalMessage, intent.intent); + const extraction = applyAddressLlmSemanticHintsToExtraction( + extractAddressFilters(canonicalMessage, intent.intent), + input.semanticHints ?? null + ); const filters = extraction.extracted_filters; + const semanticFrame: AddressSemanticFrame = extraction.semantic_frame ?? { + scope_kind: "none", + anchor_kind: "none", + anchor_value: null, + date_scope_kind: "none", + date_basis_hint: null, + self_scope_detected: false, + selected_object_scope_detected: false + }; const periodScope = inferPeriodScope(filters, canonicalMessage); return { @@ -266,10 +289,9 @@ export function buildAddressLlmPredecomposeContractV1(input: { period_from: toNonEmptyString(filters.period_from), period_to: toNonEmptyString(filters.period_to), as_of_date: toNonEmptyString(filters.as_of_date), - has_explicit_period: Boolean( - toNonEmptyString(filters.as_of_date) || toNonEmptyString(filters.period_from) || toNonEmptyString(filters.period_to) - ) + has_explicit_period: semanticFrame.date_scope_kind === "explicit" }, + semantics: semanticFrame, aggregation_profile: inferAggregationProfile(intent.intent, shape.shape) }; } @@ -370,6 +392,7 @@ export function buildAddressSemanticExtractionContractV1(input: { as_of_date: predecomposeContract.period.as_of_date, has_explicit_period: predecomposeContract.period.has_explicit_period }, + semantics: predecomposeContract.semantics, guard_hints: { source_data_signal_detected: sourceDataSignal, canonical_data_signal_detected: canonicalDataSignal, diff --git a/llm_normalizer/backend/src/services/address_runtime/resolveStage.ts b/llm_normalizer/backend/src/services/address_runtime/resolveStage.ts index 5d7d188..501c6bf 100644 --- a/llm_normalizer/backend/src/services/address_runtime/resolveStage.ts +++ b/llm_normalizer/backend/src/services/address_runtime/resolveStage.ts @@ -16,7 +16,16 @@ const PARTY_ANCHOR_STOPWORDS = new Set([ ]); export interface AnchorResolutionDebug { - anchor_type: "account" | "counterparty" | "contract" | "document_ref" | "item" | "warehouse" | "unknown" | null; + anchor_type: + | "account" + | "counterparty" + | "contract" + | "document_ref" + | "item" + | "warehouse" + | "organization" + | "unknown" + | null; anchor_value_raw: string | null; anchor_value_resolved: string | null; resolver_confidence: "high" | "medium" | "low" | null; @@ -30,6 +39,7 @@ export interface ResolveStageRow { analytics: string[]; item?: string | null; warehouse?: string | null; + organization?: string | null; } function transliterateCyrillicToLatin(value: string): string { @@ -175,6 +185,7 @@ export function resolvePrimaryAnchor(intent: AddressIntent, filters: AddressFilt const contract = typeof filters.contract === "string" ? filters.contract.trim() : ""; const item = typeof filters.item === "string" ? filters.item.trim() : ""; const warehouse = typeof filters.warehouse === "string" ? filters.warehouse.trim() : ""; + const organization = typeof filters.organization === "string" ? filters.organization.trim() : ""; const documentRef = typeof filters.document_ref === "string" ? filters.document_ref.trim() : ""; if (intent === "account_balance_snapshot" || intent === "documents_forming_balance") { @@ -260,6 +271,16 @@ export function resolvePrimaryAnchor(intent: AddressIntent, filters: AddressFilt }; } + if (organization) { + return { + anchor_type: "organization", + anchor_value_raw: organization, + anchor_value_resolved: organization, + resolver_confidence: "medium", + ambiguity_count: 0 + }; + } + if (documentRef) { return { anchor_type: "document_ref", @@ -287,7 +308,8 @@ export function refineAnchorFromRows(anchor: AnchorResolutionDebug, rows: Resolv anchor.anchor_type !== "counterparty" && anchor.anchor_type !== "contract" && anchor.anchor_type !== "item" && - anchor.anchor_type !== "warehouse" + anchor.anchor_type !== "warehouse" && + anchor.anchor_type !== "organization" ) { return anchor; } @@ -296,8 +318,16 @@ export function refineAnchorFromRows(anchor: AnchorResolutionDebug, rows: Resolv return anchor; } const searchableRows = - anchor.anchor_type === "item" || anchor.anchor_type === "warehouse" - ? rows.flatMap((row) => [row.registrator, row.item ?? "", row.warehouse ?? "", row.account_dt ?? "", row.account_kt ?? "", ...row.analytics]) + anchor.anchor_type === "item" || anchor.anchor_type === "warehouse" || anchor.anchor_type === "organization" + ? rows.flatMap((row) => [ + row.registrator, + row.item ?? "", + row.warehouse ?? "", + row.organization ?? "", + row.account_dt ?? "", + row.account_kt ?? "", + ...row.analytics + ]) : rows.flatMap((row) => row.analytics); const candidates = uniqueStrings( searchableRows diff --git a/llm_normalizer/backend/src/services/address_runtime/semanticHintOverlay.ts b/llm_normalizer/backend/src/services/address_runtime/semanticHintOverlay.ts new file mode 100644 index 0000000..d9cfd2d --- /dev/null +++ b/llm_normalizer/backend/src/services/address_runtime/semanticHintOverlay.ts @@ -0,0 +1,168 @@ +import type { + AddressAsOfDateBasis, + AddressFilterExtraction, + AddressLlmSemanticHints, + AddressSemanticFrame +} from "../../types/addressQuery"; + +function toNonEmptyString(value: unknown): string | null { + if (value === null || value === undefined) { + return null; + } + const normalized = String(value).trim(); + return normalized.length > 0 ? normalized : null; +} + +function normalizeToken(value: unknown): string { + return String(value ?? "") + .trim() + .toLowerCase() + .replace(/\s+/g, "_"); +} + +export function normalizeAddressLlmSemanticHints(value: unknown): AddressLlmSemanticHints | null { + if (!value || typeof value !== "object") { + return null; + } + const source = value as Record; + const scopeToken = normalizeToken(source.scope_target_kind); + const dateToken = normalizeToken(source.date_scope_kind); + + const scopeTargetKind: AddressLlmSemanticHints["scope_target_kind"] = + scopeToken === "self_scope" || + scopeToken === "selected_object" || + scopeToken === "organization" || + scopeToken === "warehouse" || + scopeToken === "counterparty" || + scopeToken === "contract" || + scopeToken === "item" + ? (scopeToken as AddressLlmSemanticHints["scope_target_kind"]) + : "none"; + + const dateScopeKind: AddressLlmSemanticHints["date_scope_kind"] = + dateToken === "explicit" || dateToken === "implicit_current" ? (dateToken as AddressLlmSemanticHints["date_scope_kind"]) : "missing"; + + return { + scope_target_kind: scopeTargetKind, + scope_target_text: toNonEmptyString(source.scope_target_text), + date_scope_kind: dateScopeKind, + self_scope_detected: source.self_scope_detected === true || scopeTargetKind === "self_scope", + selected_object_scope_detected: + source.selected_object_scope_detected === true || scopeTargetKind === "selected_object" + }; +} + +function defaultSemanticFrame(extraction: AddressFilterExtraction): AddressSemanticFrame { + return ( + extraction.semantic_frame ?? { + scope_kind: "none", + anchor_kind: "none", + anchor_value: null, + date_scope_kind: "none", + date_basis_hint: null, + self_scope_detected: false, + selected_object_scope_detected: false + } + ); +} + +function pushWarning(warnings: string[], value: string): void { + if (!warnings.includes(value)) { + warnings.push(value); + } +} + +function applyDateScopeHint(frame: AddressSemanticFrame, dateScopeKind: AddressLlmSemanticHints["date_scope_kind"]): void { + if (dateScopeKind === "explicit") { + frame.date_scope_kind = "explicit"; + return; + } + if (dateScopeKind === "implicit_current" && frame.date_scope_kind !== "explicit") { + frame.date_scope_kind = "implicit_current"; + frame.date_basis_hint = "implicit_current_snapshot" satisfies AddressAsOfDateBasis; + } +} + +export function applyAddressLlmSemanticHintsToExtraction( + extraction: AddressFilterExtraction, + semanticHintsInput: unknown +): AddressFilterExtraction { + const semanticHints = normalizeAddressLlmSemanticHints(semanticHintsInput); + if (!semanticHints) { + return extraction; + } + + const extractedFilters = { ...(extraction.extracted_filters ?? {}) }; + const warnings = [...(Array.isArray(extraction.warnings) ? extraction.warnings : [])]; + const semanticFrame = { ...defaultSemanticFrame(extraction) }; + const scopeTargetText = semanticHints.scope_target_text; + + applyDateScopeHint(semanticFrame, semanticHints.date_scope_kind); + + if (semanticHints.self_scope_detected) { + semanticFrame.scope_kind = "implicit_self_scope"; + semanticFrame.anchor_kind = "self_scope"; + semanticFrame.anchor_value = null; + semanticFrame.self_scope_detected = true; + } + + if (semanticHints.selected_object_scope_detected) { + if (semanticFrame.scope_kind === "none") { + semanticFrame.scope_kind = "selected_object_scope"; + semanticFrame.anchor_kind = "selected_object"; + semanticFrame.anchor_value = null; + } + semanticFrame.selected_object_scope_detected = true; + } + + if (semanticHints.scope_target_kind === "organization" && scopeTargetText) { + extractedFilters.organization = scopeTargetText; + pushWarning(warnings, "organization_from_llm_semantics"); + if (toNonEmptyString(extractedFilters.warehouse)) { + delete extractedFilters.warehouse; + pushWarning(warnings, "warehouse_cleared_by_llm_organization_semantics"); + } + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "organization"; + semanticFrame.anchor_value = scopeTargetText; + } + + if (semanticHints.scope_target_kind === "warehouse" && scopeTargetText) { + extractedFilters.warehouse = scopeTargetText; + pushWarning(warnings, "warehouse_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "warehouse"; + semanticFrame.anchor_value = scopeTargetText; + } + + if (semanticHints.scope_target_kind === "counterparty" && scopeTargetText) { + extractedFilters.counterparty = scopeTargetText; + pushWarning(warnings, "counterparty_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "counterparty"; + semanticFrame.anchor_value = scopeTargetText; + } + + if (semanticHints.scope_target_kind === "contract" && scopeTargetText) { + extractedFilters.contract = scopeTargetText; + pushWarning(warnings, "contract_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "contract"; + semanticFrame.anchor_value = scopeTargetText; + } + + if (semanticHints.scope_target_kind === "item" && scopeTargetText) { + extractedFilters.item = scopeTargetText; + pushWarning(warnings, "item_from_llm_semantics"); + semanticFrame.scope_kind = "explicit_anchor"; + semanticFrame.anchor_kind = "item"; + semanticFrame.anchor_value = scopeTargetText; + } + + return { + ...extraction, + extracted_filters: extractedFilters, + warnings, + semantic_frame: semanticFrame + }; +} diff --git a/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts index 4bef588..3cf95fb 100644 --- a/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts @@ -205,14 +205,17 @@ export async function runAssistantAddressAttemptRuntime( const runAddressLaneAttempt: RunAssistantAddressRuntimeInput["runAddressLaneAttempt"] = async ( messageUsed, carryMeta, - analysisDateHint + analysisDateHint, + llmSemanticHints = null ) => runAddressLaneAttemptRuntimeSafe( buildAssistantAddressLaneAttemptRuntimeInput({ messageUsed, carryMeta, analysisDateHint, + llmSemanticHints, activeOrganization: input.sessionScope.activeOrganization, + knownOrganizations: input.sessionScope.knownOrganizations, mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle: input.runAddressQueryTryHandle }) diff --git a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptInputBuilder.ts b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptInputBuilder.ts index 52daea9..72dc45a 100644 --- a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptInputBuilder.ts +++ b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptInputBuilder.ts @@ -4,7 +4,9 @@ export interface BuildAssistantAddressLaneAttemptRuntimeInputInput { messageUsed: RunAssistantAddressLaneAttemptRuntimeInput["messageUsed"]; carryMeta: RunAssistantAddressLaneAttemptRuntimeInput["carryMeta"]; analysisDateHint: RunAssistantAddressLaneAttemptRuntimeInput["analysisDateHint"]; + llmSemanticHints: RunAssistantAddressLaneAttemptRuntimeInput["llmSemanticHints"]; activeOrganization: RunAssistantAddressLaneAttemptRuntimeInput["activeOrganization"]; + knownOrganizations: RunAssistantAddressLaneAttemptRuntimeInput["knownOrganizations"]; mergeFollowupContextWithOrganizationScope: RunAssistantAddressLaneAttemptRuntimeInput["mergeFollowupContextWithOrganizationScope"]; runAddressQueryTryHandle: RunAssistantAddressLaneAttemptRuntimeInput["runAddressQueryTryHandle"]; @@ -17,7 +19,9 @@ export function buildAssistantAddressLaneAttemptRuntimeInput( messageUsed: input.messageUsed, carryMeta: input.carryMeta, analysisDateHint: input.analysisDateHint, + llmSemanticHints: input.llmSemanticHints, activeOrganization: input.activeOrganization, + knownOrganizations: input.knownOrganizations, mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle: input.runAddressQueryTryHandle }; diff --git a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptQueryOptionsBuilder.ts b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptQueryOptionsBuilder.ts index 7516da5..adbcc5f 100644 --- a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptQueryOptionsBuilder.ts +++ b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptQueryOptionsBuilder.ts @@ -11,18 +11,28 @@ export function resolveAssistantAddressLaneAttemptFollowupContext( export interface BuildAssistantAddressLaneAttemptQueryOptionsInput { analysisDateHint: RunAssistantAddressLaneAttemptRuntimeInput["analysisDateHint"]; scopedFollowupContext: Record | null; + llmSemanticHints: RunAssistantAddressLaneAttemptRuntimeInput["llmSemanticHints"]; + activeOrganization: RunAssistantAddressLaneAttemptRuntimeInput["activeOrganization"]; + knownOrganizations: RunAssistantAddressLaneAttemptRuntimeInput["knownOrganizations"]; } export function buildAssistantAddressLaneAttemptQueryOptions( input: BuildAssistantAddressLaneAttemptQueryOptionsInput ): Parameters[1] { - if (input.scopedFollowupContext) { - return { - followupContext: input.scopedFollowupContext, - analysisDateHint: input.analysisDateHint - }; - } - return { + const base = { analysisDateHint: input.analysisDateHint - }; + } as Parameters[1]; + if (input.scopedFollowupContext) { + base.followupContext = input.scopedFollowupContext; + } + if (input.llmSemanticHints) { + base.llmSemanticHints = input.llmSemanticHints; + } + if (input.activeOrganization) { + base.activeOrganization = input.activeOrganization; + } + if (input.knownOrganizations.length > 0) { + base.knownOrganizations = input.knownOrganizations; + } + return base; } diff --git a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptRuntimeAdapter.ts index 6e255cf..4ced9a8 100644 --- a/llm_normalizer/backend/src/services/assistantAddressLaneAttemptRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressLaneAttemptRuntimeAdapter.ts @@ -9,7 +9,9 @@ export interface RunAssistantAddressLaneAttemptRuntimeInput { messageUsed: string; carryMeta: AssistantAddressCarryoverLike | null; analysisDateHint: string | null; + llmSemanticHints?: Record | null; activeOrganization: string | null; + knownOrganizations: string[]; mergeFollowupContextWithOrganizationScope: ( followupContext: Record | null, organization: string | null @@ -19,6 +21,9 @@ export interface RunAssistantAddressLaneAttemptRuntimeInput { options: { followupContext?: Record; analysisDateHint?: string | null; + llmSemanticHints?: Record | null; + activeOrganization?: string | null; + knownOrganizations?: string[]; } ) => Promise; } @@ -35,7 +40,10 @@ export async function runAssistantAddressLaneAttemptRuntime( input.messageUsed, buildAssistantAddressLaneAttemptQueryOptions({ analysisDateHint: input.analysisDateHint, - scopedFollowupContext + scopedFollowupContext, + llmSemanticHints: input.llmSemanticHints ?? null, + activeOrganization: input.activeOrganization, + knownOrganizations: input.knownOrganizations }) ); } diff --git a/llm_normalizer/backend/src/services/assistantAddressLaneResponseRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressLaneResponseRuntimeAdapter.ts index a806099..9e267e8 100644 --- a/llm_normalizer/backend/src/services/assistantAddressLaneResponseRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressLaneResponseRuntimeAdapter.ts @@ -214,12 +214,32 @@ export function runAssistantAddressLaneResponseRuntime) + : null; if (debugKnownOrganizations.length > 0) { debug.assistant_known_organizations = debugKnownOrganizations; } if (debugActiveOrganization) { debug.assistant_active_organization = debugActiveOrganization; } + const rootIntent = input.toNonEmptyString(followupContextSource?.root_intent); + const currentFrameKind = input.toNonEmptyString(followupContextSource?.current_frame_kind); + const rootFilters = + followupContextSource?.root_filters && typeof followupContextSource.root_filters === "object" + ? (followupContextSource.root_filters as Record) + : null; + if (rootIntent || currentFrameKind) { + debug.address_root_frame_context = { + root_intent: rootIntent, + current_frame_kind: currentFrameKind, + organization: input.toNonEmptyString(rootFilters?.organization), + as_of_date: input.toNonEmptyString(rootFilters?.as_of_date), + period_from: input.toNonEmptyString(rootFilters?.period_from), + period_to: input.toNonEmptyString(rootFilters?.period_to) + }; + } const finalization = finalizeAddressTurnSafe({ sessionId: input.sessionId, userMessage: input.userMessage, diff --git a/llm_normalizer/backend/src/services/assistantAddressLaneRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressLaneRuntimeAdapter.ts index 0dd6684..de66f75 100644 --- a/llm_normalizer/backend/src/services/assistantAddressLaneRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressLaneRuntimeAdapter.ts @@ -31,11 +31,13 @@ export interface RunAssistantAddressLaneRuntimeInput { userMessage: string; addressInputMessage: string; carryover: AssistantAddressFollowupCarryoverLike | null; + llmSemanticHints?: Record | null; shouldPreferContextualLane: boolean; canRetryWithRawUserMessage: boolean; runAddressLaneAttempt: ( messageUsed: string, - carryMeta: AssistantAddressFollowupCarryoverLike | null + carryMeta: AssistantAddressFollowupCarryoverLike | null, + llmSemanticHints?: Record | null ) => Promise; isRetryableAddressLimitedResult: (addressLane: AssistantAddressLaneLike | null | undefined) => boolean; } @@ -95,7 +97,11 @@ export async function runAssistantAddressLaneRuntime( }; if (input.shouldPreferContextualLane) { - const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover); + const contextualAddressLane = await input.runAddressLaneAttempt( + input.addressInputMessage, + input.carryover, + input.llmSemanticHints ?? null + ); const decision = evaluateAddressLane(contextualAddressLane, input.addressInputMessage, input.carryover); if (decision.action === "return") { return { @@ -106,7 +112,11 @@ export async function runAssistantAddressLaneRuntime( } } - const primaryAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, null); + const primaryAddressLane = await input.runAddressLaneAttempt( + input.addressInputMessage, + null, + input.llmSemanticHints ?? null + ); const primaryDecision = evaluateAddressLane(primaryAddressLane, input.addressInputMessage, null); if (primaryDecision.action === "return") { return { @@ -117,7 +127,11 @@ export async function runAssistantAddressLaneRuntime( } if (!input.shouldPreferContextualLane && input.carryover?.followupContext) { - const contextualAddressLane = await input.runAddressLaneAttempt(input.addressInputMessage, input.carryover); + const contextualAddressLane = await input.runAddressLaneAttempt( + input.addressInputMessage, + input.carryover, + input.llmSemanticHints ?? null + ); const contextualDecision = evaluateAddressLane(contextualAddressLane, input.addressInputMessage, input.carryover); if (contextualDecision.action === "return") { return { @@ -139,7 +153,11 @@ export async function runAssistantAddressLaneRuntime( if (input.carryover?.followupContext) { retryAudit.retry_used_followup_context = true; - const rawContextualLane = await input.runAddressLaneAttempt(input.userMessage, input.carryover); + const rawContextualLane = await input.runAddressLaneAttempt( + input.userMessage, + input.carryover, + input.llmSemanticHints ?? null + ); const rawContextualDecision = evaluateAddressLane(rawContextualLane, input.userMessage, input.carryover); if (rawContextualDecision.action === "return") { retryAudit.retry_result_category = limitedCategory(rawContextualDecision.selection.addressLane); @@ -151,7 +169,7 @@ export async function runAssistantAddressLaneRuntime( } } - const rawPrimaryLane = await input.runAddressLaneAttempt(input.userMessage, null); + const rawPrimaryLane = await input.runAddressLaneAttempt(input.userMessage, null, input.llmSemanticHints ?? null); retryAudit.retry_result_category = limitedCategory(rawPrimaryLane); const rawPrimaryDecision = evaluateAddressLane(rawPrimaryLane, input.userMessage, null); if (rawPrimaryDecision.action === "return") { diff --git a/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts index 62cd403..fa68143 100644 --- a/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts @@ -50,6 +50,64 @@ export interface BuildAssistantAddressOrchestrationRuntimeOutput { }; } +function hasSelectedObjectInventorySignal(text: string | null): boolean { + return /(?:по\s+выбранному\s+объекту|по\s+этой\s+позиции|по\s+этому\s+товару|selected\s+object)/iu.test( + String(text ?? "") + ); +} + +function hasSelectedObjectInventoryActionCue(text: string | null): boolean { + return /(?:кому[\s\S]{0,80}продал[аи]?|кому[\s\S]{0,80}реализова[нлт][а-я]*|кому\s+был\s+продан|кто[\s\S]{0,40}купил|кто\s+это\s+поставил|кто\s+поставил|у\s+кого\s+купили|у\s+кого\s+куплено|где\s+мы\s+купили|где\s+куплено|по\s+каким\s+документам|какими\s+документами|покажи\s+документы|документы\s+закупки|buyer|sale\s+trace|supplier|vendor|purchase\s+documents|purchase[\s-]?to[\s-]?sale|old\s+purchase|aged\s+stock)/iu.test( + String(text ?? "") + ); +} + +function isGenericCanonicalDriftIntent(intent: string | null): boolean { + return ( + intent === "open_items_by_counterparty_or_contract" || + intent === "list_documents_by_counterparty" || + intent === "list_documents_by_contract" || + intent === "bank_operations_by_counterparty" || + intent === "bank_operations_by_contract" || + intent === "documents_forming_balance" + ); +} + +function shouldPreferRawFollowupMessage( + userMessage: string, + addressInputMessage: string, + carryover: AssistantAddressCarryoverLike | null, + addressPreDecompose: Record, + toNonEmptyString: BuildAssistantAddressOrchestrationRuntimeInput["toNonEmptyString"] +): boolean { + if (!carryover?.followupContext || typeof carryover.followupContext !== "object") { + return false; + } + + const rawMessage = toNonEmptyString(userMessage); + const canonicalMessage = toNonEmptyString(addressInputMessage); + if (!rawMessage || !canonicalMessage || rawMessage === canonicalMessage) { + return false; + } + + const predecomposeContract = + addressPreDecompose?.predecomposeContract && typeof addressPreDecompose.predecomposeContract === "object" + ? (addressPreDecompose.predecomposeContract as Record) + : null; + const mode = toNonEmptyString(predecomposeContract?.mode) ?? "unknown"; + const intent = toNonEmptyString(predecomposeContract?.intent) ?? "unknown"; + + if (mode === "unsupported" && intent === "unknown") { + return true; + } + + return ( + hasSelectedObjectInventorySignal(rawMessage) && + hasSelectedObjectInventoryActionCue(rawMessage) && + isGenericCanonicalDriftIntent(intent) + ); +} + function fallbackAddressPreDecompose( userMessage: string, llmProvider: unknown, @@ -80,7 +138,7 @@ function fallbackAddressPreDecompose( export async function buildAssistantAddressOrchestrationRuntime( input: BuildAssistantAddressOrchestrationRuntimeInput ): Promise { - const addressPreDecompose = input.featureAddressLlmPredecomposeV1 + const initialAddressPreDecompose = input.featureAddressLlmPredecomposeV1 ? await input.runAddressLlmPreDecompose() : fallbackAddressPreDecompose( input.userMessage, @@ -89,14 +147,43 @@ export async function buildAssistantAddressOrchestrationRuntime( input.sanitizeAddressMessageForFallback ); - const addressInputMessage = + let addressPreDecompose = initialAddressPreDecompose; + let addressInputMessage = input.toNonEmptyString(addressPreDecompose?.effectiveMessage) ?? input.userMessage; - const carryover = input.resolveAddressFollowupCarryoverContext( + let carryover = input.resolveAddressFollowupCarryoverContext( input.userMessage, input.sessionItems, addressInputMessage, addressPreDecompose ); + if ( + shouldPreferRawFollowupMessage( + input.userMessage, + addressInputMessage, + carryover, + addressPreDecompose, + input.toNonEmptyString + ) + ) { + addressInputMessage = input.userMessage; + addressPreDecompose = { + ...addressPreDecompose, + applied: false, + effectiveMessage: input.userMessage, + reason: "followup_raw_message_preferred_over_llm_rewrite", + predecomposeContract: input.buildAddressLlmPredecomposeContractV1({ + sourceMessage: input.userMessage, + canonicalMessage: input.userMessage + }) + }; + carryover = input.resolveAddressFollowupCarryoverContext( + input.userMessage, + input.sessionItems, + addressInputMessage, + addressPreDecompose + ); + } + const followupContext = carryover?.followupContext ?? null; const orchestrationDecision = input.resolveAssistantOrchestrationDecision({ rawUserMessage: input.userMessage, diff --git a/llm_normalizer/backend/src/services/assistantAddressRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressRuntimeAdapter.ts index 6bd59f5..cdd61fc 100644 --- a/llm_normalizer/backend/src/services/assistantAddressRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressRuntimeAdapter.ts @@ -35,7 +35,8 @@ export interface RunAssistantAddressRuntimeInput { runAddressLaneAttempt: ( messageUsed: string, carryMeta: AssistantAddressCarryoverLike | null, - analysisDateHint: string | null + analysisDateHint: string | null, + llmSemanticHints?: Record | null ) => Promise; isRetryableAddressLimitedResult: (addressLane: AssistantAddressLaneLike | null | undefined) => boolean; finalizeAddressLaneResponse: ( @@ -78,7 +79,8 @@ export interface RunAssistantAddressRuntimeInput { canRetryWithRawUserMessage: boolean; runAddressLaneAttempt: ( messageUsed: string, - carryMeta: AssistantAddressCarryoverLike | null + carryMeta: AssistantAddressCarryoverLike | null, + llmSemanticHints?: Record | null ) => Promise; isRetryableAddressLimitedResult: (addressLane: AssistantAddressLaneLike | null | undefined) => boolean; } @@ -157,10 +159,14 @@ export async function runAssistantAddressRuntime( userMessage: input.userMessage, addressInputMessage, carryover, + llmSemanticHints: + addressRuntimeMeta && typeof addressRuntimeMeta === "object" + ? ((addressRuntimeMeta as { semanticHints?: unknown }).semanticHints as Record | null) ?? null + : null, shouldPreferContextualLane, canRetryWithRawUserMessage, - runAddressLaneAttempt: (messageUsed, carryMeta) => - input.runAddressLaneAttempt(messageUsed, carryMeta, analysisDateHint), + runAddressLaneAttempt: (messageUsed, carryMeta, llmSemanticHints = null) => + input.runAddressLaneAttempt(messageUsed, carryMeta, analysisDateHint, llmSemanticHints), isRetryableAddressLimitedResult: input.isRetryableAddressLimitedResult }); if (addressLaneRuntime.handled && addressLaneRuntime.selection) { diff --git a/llm_normalizer/backend/src/services/assistantOrganizationMatcher.ts b/llm_normalizer/backend/src/services/assistantOrganizationMatcher.ts new file mode 100644 index 0000000..723f18a --- /dev/null +++ b/llm_normalizer/backend/src/services/assistantOrganizationMatcher.ts @@ -0,0 +1,221 @@ +const ORGANIZATION_SCOPE_STOPWORDS = new Set([ + "ооо", + "зао", + "оао", + "пао", + "ао", + "ип", + "llc", + "inc", + "ltd", + "corp", + "group", + "company", + "co", + "the", + "and", + "org", + "organization", + "компания", + "организация", + "контора", + "фирма", + "база", + "по", + "в", + "во", + "на", + "для", + "из", + "у", + "к", + "от", + "это", + "эта", + "этой", + "этот", + "сегодня", + "сейчас", + "текущая", + "текущей", + "наш", + "наша", + "нашей", + "нашу", + "наши" +]); + +function normalizeScopeLabel(value: unknown): string { + return String(value ?? "") + .replace(/[“”«»]/g, '"') + .replace(/\s+/g, " ") + .trim(); +} + +function normalizeScopeKey(value: unknown): string { + return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е"); +} + +export function normalizeOrganizationScopeValue(value: unknown): string | null { + const normalized = normalizeScopeLabel(value); + if (!normalized) { + return null; + } + let unwrapped = normalized.replace(/^\\+|\\+$/g, "").trim(); + if ( + (unwrapped.startsWith('"') && unwrapped.endsWith('"')) || + (unwrapped.startsWith("'") && unwrapped.endsWith("'")) + ) { + unwrapped = unwrapped.slice(1, -1).trim(); + } + return unwrapped.length > 0 ? unwrapped : null; +} + +export function normalizeOrganizationScopeSearchText(value: unknown): string { + return normalizeScopeKey(value) + .replace(/[^\p{L}\p{N}]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function tokenizeOrganizationScope(value: unknown): string[] { + const normalized = normalizeOrganizationScopeSearchText(value); + if (!normalized) { + return []; + } + return normalized + .split(" ") + .map((token) => token.trim()) + .filter((token) => token.length >= 3 && !ORGANIZATION_SCOPE_STOPWORDS.has(token)); +} + +function organizationTokenVariants(token: string): string[] { + const source = String(token ?? "").trim().toLowerCase(); + if (!source) { + return []; + } + const variants = new Set([source]); + const withoutLongEnding = source.replace( + /(?:ами|ями|ого|ему|ому|ыми|ими|иях|ях|ах|ей|ой|ом|ем|ам|ям|ую|юю|ая|яя|ое|ее|ые|ие|ов|ев|ий|ый|ой)$/iu, + "" + ); + if (withoutLongEnding.length >= 4) { + variants.add(withoutLongEnding); + } + const withoutShortEnding = source.replace(/[аеёиоуыэюя]$/iu, ""); + if (withoutShortEnding.length >= 4) { + variants.add(withoutShortEnding); + } + return Array.from(variants); +} + +export function scoreOrganizationMentionInMessage(message: unknown, organization: unknown): number { + const messageNorm = normalizeOrganizationScopeSearchText(message); + const organizationNorm = normalizeOrganizationScopeSearchText(organization); + if (!messageNorm || !organizationNorm) { + return 0; + } + if (messageNorm.includes(organizationNorm)) { + return 10_000 + organizationNorm.length; + } + const organizationTokens = tokenizeOrganizationScope(organizationNorm); + const messageTokens = tokenizeOrganizationScope(messageNorm); + if (organizationTokens.length === 0 || messageTokens.length === 0) { + return 0; + } + + let matchedTokens = 0; + let score = 0; + for (const token of organizationTokens) { + const variants = organizationTokenVariants(token); + let matched = false; + let variantScore = 0; + for (const variant of variants) { + if (!variant) { + continue; + } + if (messageNorm.includes(variant)) { + matched = true; + variantScore = Math.max(variantScore, variant.length * 5); + continue; + } + const fuzzyMatched = messageTokens.some((messageToken) => { + if (messageToken === variant) { + return true; + } + if (messageToken.length >= 5 && variant.length >= 5) { + return messageToken.startsWith(variant) || variant.startsWith(messageToken); + } + return false; + }); + if (fuzzyMatched) { + matched = true; + variantScore = Math.max(variantScore, Math.max(20, variant.length * 3)); + } + } + if (matched) { + matchedTokens += 1; + score += variantScore > 0 ? variantScore : 10; + } + } + + if (matchedTokens === 0) { + return 0; + } + if (matchedTokens === organizationTokens.length) { + score += 400; + } else { + score += matchedTokens * 50; + } + return score; +} + +export function mergeKnownOrganizations(values: unknown[], limit = 50): string[] { + const dedup = new Map(); + for (const raw of Array.isArray(values) ? values : []) { + const normalized = normalizeOrganizationScopeValue(raw); + if (!normalized) { + continue; + } + const key = normalizeOrganizationScopeSearchText(normalized); + if (!key || dedup.has(key)) { + continue; + } + dedup.set(key, normalized); + } + return Array.from(dedup.values()).slice(0, limit); +} + +export function resolveOrganizationSelectionFromMessage( + userMessage: string, + knownOrganizations: unknown[] +): string | null { + const known = mergeKnownOrganizations(Array.isArray(knownOrganizations) ? knownOrganizations : []); + if (!userMessage || known.length === 0) { + return null; + } + const messageNorm = normalizeOrganizationScopeSearchText(userMessage); + if (!messageNorm) { + return null; + } + const scored = known + .map((organization) => ({ + organization, + score: scoreOrganizationMentionInMessage(messageNorm, organization) + })) + .filter((item) => item.score > 0) + .sort((a, b) => b.score - a.score || a.organization.length - b.organization.length); + + if (scored.length === 0) { + return null; + } + const best = scored[0]; + const second = scored[1]; + if (best.score < 90) { + return null; + } + if (second && second.score === best.score) { + return null; + } + return best.organization; +} diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index 2e5c22d..03a39e1 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -2453,6 +2453,62 @@ function findRecentAddressFilterValue(items, key) { } return null; } +function isInventoryRootFrameIntent(intent) { + return intent === "inventory_on_hand_as_of_date"; +} +function isInventoryDrilldownFrameIntent(intent) { + return intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date"; +} +function extractAddressCarryoverAnchor(addressDebug) { + if (!isAddressLaneDebugPayload(addressDebug)) { + return { + anchorType: null, + anchorValue: null + }; + } + return { + anchorType: toNonEmptyString(addressDebug.anchor_type), + anchorValue: toNonEmptyString(addressDebug.anchor_value_resolved) ?? + toNonEmptyString(addressDebug.anchor_value_raw) ?? + readAddressInventoryItemFilter(addressDebug) ?? + readAddressFilterString(addressDebug, "counterparty") ?? + readAddressFilterString(addressDebug, "contract") ?? + readAddressFilterString(addressDebug, "account") + }; +} +function findRecentInventoryRootFrame(items) { + for (let index = items.length - 1; index >= 0; index -= 1) { + const item = items[index]; + if (!item || item.role !== "assistant" || !item.debug) { + continue; + } + const debug = item.debug; + if (!isAddressLaneDebugPayload(debug)) { + continue; + } + const detectedIntent = toNonEmptyString(debug.detected_intent); + if (!isInventoryRootFrameIntent(detectedIntent)) { + continue; + } + const anchor = extractAddressCarryoverAnchor(debug); + const filtersRaw = debug.extracted_filters; + const filters = filtersRaw && typeof filtersRaw === "object" + ? { ...filtersRaw } + : {}; + return { + intent: detectedIntent, + filters, + anchorType: anchor.anchorType, + anchorValue: anchor.anchorValue, + messageId: toNonEmptyString(item.message_id) + }; + } + return null; +} const ADDRESS_FOLLOWUP_OFFER_BY_INTENT = { list_documents_by_counterparty: ["bank_operations_by_counterparty", "list_contracts_by_counterparty"], bank_operations_by_counterparty: ["list_documents_by_counterparty", "list_contracts_by_counterparty"], @@ -2755,6 +2811,14 @@ function resolveAddressFollowupCarryoverContext(userMessage, items, alternateMes readAddressFilterString(previousAddressDebug, "counterparty") ?? readAddressFilterString(previousAddressDebug, "account") ?? readAddressFilterString(previousAddressDebug, "contract"); + const inventoryRootFrame = findRecentInventoryRootFrame(items); + const currentFrameKind = inventoryRootFrame + ? isInventoryDrilldownFrameIntent(sourceIntent) + ? "inventory_drilldown" + : isInventoryRootFrameIntent(sourceIntent) + ? "inventory_root" + : "generic" + : null; let resolvedCounterpartyFromDisplay = false; const previousFiltersRaw = previousAddressDebug.extracted_filters; const previousFilters = previousFiltersRaw && typeof previousFiltersRaw === "object" @@ -2814,7 +2878,12 @@ function resolveAddressFollowupCarryoverContext(userMessage, items, alternateMes previous_filters: previousFilters, previous_anchor_type: previousAnchorType ?? undefined, previous_anchor_value: previousAnchor, - resolved_counterparty_from_display: resolvedCounterpartyFromDisplay || undefined + resolved_counterparty_from_display: resolvedCounterpartyFromDisplay || undefined, + root_intent: inventoryRootFrame?.intent ?? undefined, + root_filters: inventoryRootFrame?.filters ?? undefined, + root_anchor_type: inventoryRootFrame?.anchorType ?? undefined, + root_anchor_value: inventoryRootFrame?.anchorValue ?? undefined, + current_frame_kind: currentFrameKind ?? undefined }, previousAddressIntent: previousIntent, previousAddressAnchor: previousAnchor, @@ -2890,19 +2959,32 @@ function isAddressLlmPreDecomposeCandidate(userMessage) { } return /(?:\bдок\b|доки|документ|контрагент|договор|остаток|сч(?:е|ё)т|сальдо|банк|выписк|платеж|оплат|поступлен|поступлени|списан|реализац|сверк|взаиморасч|кто\s+должен|show|list|documents?|counterparty|contract|account|balance|bank\s+operations?|doki|dokument(?:y|ov|am|a)?|platezh|oplata|schet|saldo)/i.test(text); } -function extractAddressQuestionFromNormalized(normalized) { - if (!normalized || typeof normalized !== "object") { +function normalizeAddressSemanticHintsFromFragment(fragment) { + if (!fragment || typeof fragment !== "object") { return null; } - const source = normalized; - const fragments = Array.isArray(source.fragments) ? source.fragments : []; - for (const item of fragments) { + const hints = fragment.semantic_hints; + if (!hints || typeof hints !== "object") { + return null; + } + const scopeTargetKind = toNonEmptyString(hints.scope_target_kind); + const dateScopeKind = toNonEmptyString(hints.date_scope_kind); + return { + scope_target_kind: scopeTargetKind ?? "none", + scope_target_text: toNonEmptyString(hints.scope_target_text), + date_scope_kind: dateScopeKind ?? "missing", + self_scope_detected: hints.self_scope_detected === true || scopeTargetKind === "self_scope", + selected_object_scope_detected: hints.selected_object_scope_detected === true || scopeTargetKind === "selected_object" + }; +} +function extractAddressPredecomposeCandidateFromFragments(fragments) { + for (const item of Array.isArray(fragments) ? fragments : []) { if (!item || typeof item !== "object") { continue; } const fragment = item; const domainRelevance = String(fragment.domain_relevance ?? "").trim().toLowerCase(); - if (domainRelevance === "out_of_scope") { + if (domainRelevance === "out_of_scope" || domainRelevance === "offtopic") { continue; } const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); @@ -2912,11 +2994,20 @@ function extractAddressQuestionFromNormalized(normalized) { continue; } if (candidate.length >= 3 && candidate.length <= 500) { - return candidate; + return { + candidate, + semanticHints: normalizeAddressSemanticHintsFromFragment(fragment) + }; } } return null; } +function extractAddressPredecomposeCandidateFromNormalized(normalized) { + if (!normalized || typeof normalized !== "object") { + return null; + } + return extractAddressPredecomposeCandidateFromFragments(normalized.fragments); +} function stripMarkdownJsonFence(text) { return String(text ?? "") .trim() @@ -2994,7 +3085,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) { } return null; } -function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { +function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) { const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); if (!outputText) { return null; @@ -3003,31 +3094,7 @@ function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { if (!parsed || typeof parsed !== "object") { return null; } - const source = parsed; - const fragments = Array.isArray(source.fragments) ? source.fragments : []; - for (const item of fragments) { - if (!item || typeof item !== "object") { - continue; - } - const fragment = item; - const domainRelevance = fragment.domain_relevance; - if (typeof domainRelevance === "string" && domainRelevance.trim().toLowerCase() === "out_of_scope") { - continue; - } - if (domainRelevance === false) { - continue; - } - const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); - const rawText = toNonEmptyString(fragment.raw_fragment_text); - const candidate = selectPreferredAddressFragmentCandidate(rawText ?? "", normalizedText ?? ""); - if (!candidate) { - continue; - } - if (candidate.length >= 3 && candidate.length <= 500) { - return candidate; - } - } - return null; + return extractAddressPredecomposeCandidateFromFragments(parsed.fragments); } const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "есть", @@ -3267,7 +3334,8 @@ function attachAddressPredecomposeContract(meta, sourceMessage) { const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? ""); const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({ sourceMessage: String(sourceMessage ?? ""), - canonicalMessage + canonicalMessage, + semanticHints: meta?.semanticHints ?? null }); const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({ sourceMessage: String(sourceMessage ?? ""), @@ -3332,31 +3400,34 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidateFromNormalized = extractAddressQuestionFromNormalized(normalized?.normalized); - const candidateFromRaw = candidateFromNormalized ? null : extractAddressQuestionFromRawNormalizerOutput(normalized?.raw_model_output); - const candidate = candidateFromNormalized ?? candidateFromRaw; + const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output); + const candidateMeta = candidateFromNormalized ?? candidateFromRaw; + const candidate = candidateMeta?.candidate ?? null; if (!candidate) { if (fallbackCandidate) { const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase()); const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase()); const fallbackApplied = fallbackCompact.length > 0 && fallbackCompact !== sourceCompact; if (fallbackApplied) { - return attachAddressPredecomposeContract({ - ...baseMeta, - attempted: true, - applied: true, - traceId: normalized?.trace_id ?? null, - effectiveMessage: fallbackCandidate.candidate, - reason: "fallback_rule_applied_after_llm", - fallbackRuleHit: fallbackCandidate.rule - }, userMessage); - } + return attachAddressPredecomposeContract({ + ...baseMeta, + attempted: true, + applied: true, + traceId: normalized?.trace_id ?? null, + effectiveMessage: fallbackCandidate.candidate, + reason: "fallback_rule_applied_after_llm", + fallbackRuleHit: fallbackCandidate.rule, + semanticHints: null + }, userMessage); + } } return attachAddressPredecomposeContract({ ...baseMeta, attempted: true, traceId: normalized?.trace_id ?? null, - reason: normalized?.ok ? "no_usable_fragment" : "normalize_failed" + reason: normalized?.ok ? "no_usable_fragment" : "normalize_failed", + semanticHints: null }, userMessage); } const repairedSourceMessage = repairAddressMojibake(userMessage); @@ -3375,7 +3446,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_diagnostic_rewrite", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const intentConflict = sourceIntentKnown && @@ -3397,7 +3469,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage ? "normalized_fragment_rejected_intent_drop" : "normalized_fragment_rejected_intent_conflict", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceHasExplicitDrilldownSignal = hasPredecomposeExplicitDrilldownSignal(repairedSourceMessage || userMessage); @@ -3418,7 +3491,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_followup_intent_injection", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceHasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignalForPredecompose(repairedSourceMessage || userMessage); @@ -3438,7 +3512,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_selected_object_context_loss", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceAnchorQuality = evaluateAddressAnchorQuality(repairedSourceMessage || userMessage); @@ -3464,7 +3539,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_anchor_substitution", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const anchorDegradedByCandidate = sameIntentForAnchorSafety && @@ -3481,7 +3557,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_anchor_degradation", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } if (fallbackCandidate) { @@ -3500,19 +3577,25 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: fallbackCandidate.candidate, reason: "fallback_rule_preferred_over_llm_candidate_anchor_quality", fallbackRuleHit: fallbackCandidate.rule, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } } const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({ sourceMessage: String(userMessage ?? ""), - canonicalMessage: candidate + canonicalMessage: candidate, + predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({ + sourceMessage: String(userMessage ?? ""), + canonicalMessage: candidate, + semanticHints: candidateMeta?.semanticHints ?? null + }) }); if (!semanticContractForCandidate.apply_canonical_recommended) { const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected); const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected && candidateFromNormalized && - candidateFromNormalized === candidate && + candidateFromNormalized.candidate === candidate && toNonEmptyString(candidate)); if (rawFragmentCandidatePreferred) { return attachAddressPredecomposeContract({ @@ -3524,7 +3607,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: candidate, reason: "normalized_fragment_semantic_guard_raw_fragment_preferred", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } if (fallbackCandidate) { @@ -3545,7 +3629,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: String(fallbackCandidate.candidate ?? ""), reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard", fallbackRuleHit: fallbackCandidate.rule, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } } @@ -3558,7 +3643,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage effectiveMessage: userMessage, reason: "normalized_fragment_rejected_semantic_guard", fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase()); @@ -3585,7 +3671,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage reason, llmCanonicalCandidateDetected: true, fallbackRuleHit: null, - sanitizedUserMessage + sanitizedUserMessage, + semanticHints: candidateMeta?.semanticHints ?? null }, userMessage); } catch (error) { @@ -3933,7 +4020,11 @@ export function resolveAssistantOrchestrationDecision(input) { hasOpenContractsAddressSignal(repairedEffectiveAddressUserMessage); const modeSample = repairedEffectiveAddressUserMessage || effectiveAddressUserMessage; const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample); + const modeDetectionRaw = (0, addressQueryClassifier_1.detectAddressQuestionMode)(repairedRawUserMessage || rawUserMessage); + const resolvedModeDetection = modeDetection.mode === "address_query" ? modeDetection : modeDetectionRaw; const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample); + const intentResolutionRaw = (0, addressIntentResolver_1.resolveAddressIntent)(repairedRawUserMessage || rawUserMessage); + const resolvedIntentResolution = intentResolution.intent !== "unknown" ? intentResolution : intentResolutionRaw; const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent); const llmPreDecomposeReason = toNonEmptyString(llmPreDecomposeMeta?.reason); const llmRuntimeUnavailableDetected = Boolean(llmPreDecomposeReason && @@ -3951,10 +4042,10 @@ export function resolveAssistantOrchestrationDecision(input) { hasStrictDeepInvestigationCue(repairedRawUserMessage) || hasStrictDeepInvestigationCue(effectiveAddressUserMessage) || hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage); - const strictDeepInvestigationBypassAllowed = shouldBypassStrictDeepInvestigationCueForAddressIntent(intentResolution.intent) || + const strictDeepInvestigationBypassAllowed = shouldBypassStrictDeepInvestigationCueForAddressIntent(resolvedIntentResolution.intent) || shouldBypassStrictDeepInvestigationCueForAddressIntent(llmContractIntent); const keepAddressLaneByIntent = semanticApplyCanonicalRecommended && - Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) || + Boolean((resolvedIntentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(resolvedIntentResolution.intent)) || (llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent)) || openContractsAddressSignal) && (!strictDeepInvestigationCueDetected || strictDeepInvestigationBypassAllowed); @@ -3995,8 +4086,8 @@ export function resolveAssistantOrchestrationDecision(input) { !capabilityMetaQuery && !dataRetrievalSignal && !effectiveAddressFollowupSignal && - modeDetection.mode === "unsupported" && - intentResolution.intent === "unknown"); + resolvedModeDetection.mode === "unsupported" && + resolvedIntentResolution.intent === "unknown"); const nonDomainQueryIndexed = Boolean(!llmFirstAddressCandidate && deterministicNonDomainGuard && (llmFirstUnsupportedCandidate || llmContractMode === null) && @@ -4016,10 +4107,10 @@ export function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "data_scope", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4044,10 +4135,10 @@ export function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "capability", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4072,10 +4163,10 @@ export function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: "non_domain", - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, followup_context_detected: Boolean(followupContext), @@ -4111,7 +4202,7 @@ export function resolveAssistantOrchestrationDecision(input) { hasShortDebtMirrorFollowupSignal(repairedRawUserMessage) || hasShortDebtMirrorFollowupSignal(repairedEffectiveAddressUserMessage)); const supportedAddressIntentDetected = (!strictDeepInvestigationCueDetected || strictDeepInvestigationBypassAllowed) && - Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) || + Boolean((resolvedIntentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(resolvedIntentResolution.intent)) || (llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent)) || openContractsAddressSignal); const semanticGuardHints = semanticExtractionContract?.guard_hints && @@ -4131,7 +4222,7 @@ export function resolveAssistantOrchestrationDecision(input) { semanticAggregateShapeDetected || semanticDeepInvestigationHintDetected || !semanticApplyCanonicalRecommended)); - const unsupportedIntentOrMode = (modeDetection.mode !== "address_query" && intentResolution.intent === "unknown") || + const unsupportedIntentOrMode = (resolvedModeDetection.mode !== "address_query" && resolvedIntentResolution.intent === "unknown") || llmContractMode === "unsupported"; const unsupportedAddressIntentFallbackToDeep = Boolean(baseToolGate?.runAddressLane && !llmRuntimeUnavailableDetected && @@ -4251,10 +4342,10 @@ export function resolveAssistantOrchestrationDecision(input) { orchestrationContract: { schema_version: "assistant_orchestration_contract_v1", hard_meta_mode: null, - address_mode: modeDetection.mode, - address_mode_confidence: modeDetection.confidence, - address_intent: intentResolution.intent, - address_intent_confidence: intentResolution.confidence, + address_mode: resolvedModeDetection.mode, + address_mode_confidence: resolvedModeDetection.confidence, + address_intent: resolvedIntentResolution.intent, + address_intent_confidence: resolvedIntentResolution.confidence, strong_data_signal_detected: strongDataSignal, data_retrieval_signal_detected: dataRetrievalSignal, semantic_contract_valid: semanticContractValid, diff --git a/llm_normalizer/backend/src/services/normalizerService.ts b/llm_normalizer/backend/src/services/normalizerService.ts index 0fa11a2..158b514 100644 --- a/llm_normalizer/backend/src/services/normalizerService.ts +++ b/llm_normalizer/backend/src/services/normalizerService.ts @@ -14,6 +14,7 @@ import type { NormalizedFragmentV2, NormalizedFragmentV2_0_1, NormalizedFragmentV2_0_2, + NormalizedFragmentSemanticHints, NormalizedPayload, NormalizedQueryV1, NormalizedQueryV2, @@ -352,6 +353,90 @@ function coerceFlags( }; } +function inferSemanticHints( + rawText: string, + timeScope: NormalizedFragmentV2["time_scope"] +): NormalizedFragmentSemanticHints { + return { + scope_target_kind: "none", + scope_target_text: null, + date_scope_kind: timeScope.type === "explicit" ? "explicit" : "missing", + self_scope_detected: false, + selected_object_scope_detected: /(?:по\s+выбранному\s+объекту|selected\s+object)/iu.test(String(rawText ?? "")) + }; +} + +function coerceSemanticScopeTargetKind(value: unknown): NormalizedFragmentSemanticHints["scope_target_kind"] { + const token = normalizeToken(value); + if ( + token === "none" || + token === "self_scope" || + token === "selected_object" || + token === "organization" || + token === "warehouse" || + token === "counterparty" || + token === "contract" || + token === "item" + ) { + return token; + } + if (["organization_scope", "company_scope", "org_scope", "company", "organization_anchor"].includes(token)) { + return "organization"; + } + if (["warehouse_scope", "stock_scope", "warehouse_anchor"].includes(token)) { + return "warehouse"; + } + if (["own_company_scope", "implicit_self_scope", "our_scope"].includes(token)) { + return "self_scope"; + } + if (["selected_object_scope", "selected_object_anchor"].includes(token)) { + return "selected_object"; + } + return "none"; +} + +function coerceSemanticDateScopeKind(value: unknown): NormalizedFragmentSemanticHints["date_scope_kind"] { + const token = normalizeToken(value); + if (token === "explicit" || token === "implicit_current" || token === "missing") { + return token; + } + if (["implicit_current_snapshot", "current", "today", "default_current"].includes(token)) { + return "implicit_current"; + } + return "missing"; +} + +function coerceSemanticHints( + value: unknown, + rawText: string, + timeScope: NormalizedFragmentV2["time_scope"] +): NormalizedFragmentSemanticHints { + const fallback = inferSemanticHints(rawText, timeScope); + if (!value || typeof value !== "object") { + return fallback; + } + const source = value as Record; + return { + scope_target_kind: coerceSemanticScopeTargetKind(source.scope_target_kind ?? source.anchor_kind ?? source.scope_kind), + scope_target_text: + toOptionalString( + source.scope_target_text ?? + source.anchor_value ?? + source.organization ?? + source.warehouse ?? + source.counterparty ?? + source.contract ?? + source.item + ) ?? fallback.scope_target_text, + date_scope_kind: coerceSemanticDateScopeKind(source.date_scope_kind ?? source.date_scope ?? source.time_scope_kind), + self_scope_detected: coerceBoolean(source.self_scope_detected, fallback.self_scope_detected), + selected_object_scope_detected: coerceBoolean( + source.selected_object_scope_detected, + fallback.selected_object_scope_detected + ) + }; +} + function mapCandidateLabel(value: string): NormalizedFragmentV2["candidate_labels"][number] | null { const token = normalizeToken(value); if (CANDIDATE_LABEL_VALUES.includes(token as NormalizedFragmentV2["candidate_labels"][number])) { @@ -421,6 +506,7 @@ function coerceFragmentV2(rawFragment: unknown, index: number, userMessage: stri const accountHints = coerceStringArray(source.account_hints); const documentHints = coerceStringArray(source.document_hints); const registerHints = coerceStringArray(source.register_hints); + const timeScope = coerceTimeScope(source.time_scope, rawText, base.time_scope); return { fragment_id: coerceFragmentId(source.fragment_id, index, base.fragment_id), @@ -432,8 +518,9 @@ function coerceFragmentV2(rawFragment: unknown, index: number, userMessage: stri account_hints: accountHints.length > 0 ? accountHints : base.account_hints, document_hints: documentHints.length > 0 ? documentHints : base.document_hints, register_hints: registerHints.length > 0 ? registerHints : base.register_hints, - time_scope: coerceTimeScope(source.time_scope, rawText, base.time_scope), + time_scope: timeScope, flags, + semantic_hints: coerceSemanticHints(source.semantic_hints, rawText, timeScope), candidate_labels: coerceCandidateLabels(source.candidate_labels, flags, domainRelevance, base.candidate_labels), confidence: coerceConfidence(source.confidence, base.confidence) }; @@ -923,6 +1010,7 @@ function buildFragmentV2(rawText: string, index: number): NormalizedFragmentV2 | } else if (flags.asks_for_exact_object_trace || flags.asks_for_ranking_or_top) { confidence = "high"; } + const timeScope = inferTimeScope(text); return { fragment_id: `F${index + 1}`, @@ -940,8 +1028,9 @@ function buildFragmentV2(rawText: string, index: number): NormalizedFragmentV2 | account_hints: extractAccounts(text), document_hints: Array.from(new Set(Array.from(lower.matchAll(/(документ|реализац|поступлен|платеж|выписк|акт сверк)/g)).map((item) => item[0]))), register_hints: Array.from(new Set(Array.from(lower.matchAll(/(регистр|движен|остатк|сальдо)/g)).map((item) => item[0]))), - time_scope: inferTimeScope(text), + time_scope: timeScope, flags, + semantic_hints: inferSemanticHints(text, timeScope), candidate_labels: candidateLabels, confidence }; diff --git a/llm_normalizer/backend/src/types/addressQuery.ts b/llm_normalizer/backend/src/types/addressQuery.ts index 70233da..07247eb 100644 --- a/llm_normalizer/backend/src/types/addressQuery.ts +++ b/llm_normalizer/backend/src/types/addressQuery.ts @@ -38,11 +38,40 @@ export type AddressIntent = export type AddressResponseType = "FACTUAL_LIST" | "FACTUAL_SUMMARY" | "LIMITED_WITH_REASON"; export type AddressResultMode = "heuristic_candidates" | "confirmed_balance"; export type AddressEvidenceStrength = "weak" | "medium" | "strong"; -export type AddressAsOfDateBasis = "period_end" | "explicit_as_of_date" | "period_range"; +export type AddressAsOfDateBasis = "period_end" | "explicit_as_of_date" | "period_range" | "implicit_current_snapshot"; export type AddressCapabilityLayer = "compute" | "navigation" | "conversational"; export type AddressCapabilityRouteMode = "exact" | "heuristic"; export type AddressShadowRouteStatus = "skipped" | "planned" | "unavailable"; export type AddressRouteExpectationStatus = "matched" | "mismatch" | "not_found"; +export type AddressSemanticScopeKind = "none" | "explicit_anchor" | "implicit_self_scope" | "selected_object_scope"; +export type AddressSemanticAnchorKind = + | "none" + | "warehouse" + | "organization" + | "counterparty" + | "contract" + | "item" + | "self_scope" + | "selected_object"; +export type AddressSemanticDateScopeKind = "none" | "explicit" | "implicit_current"; + +export interface AddressLlmSemanticHints { + scope_target_kind: Exclude | "none"; + scope_target_text: string | null; + date_scope_kind: Exclude | "missing"; + self_scope_detected: boolean; + selected_object_scope_detected: boolean; +} + +export interface AddressSemanticFrame { + scope_kind: AddressSemanticScopeKind; + anchor_kind: AddressSemanticAnchorKind; + anchor_value: string | null; + date_scope_kind: AddressSemanticDateScopeKind; + date_basis_hint: AddressAsOfDateBasis | null; + self_scope_detected: boolean; + selected_object_scope_detected: boolean; +} export type AddressQueryShape = | "AGGREGATE_LOOKUP" @@ -124,6 +153,7 @@ export interface AddressFilterExtraction { extracted_filters: AddressFilterSet; missing_required_filters: string[]; warnings: string[]; + semantic_frame?: AddressSemanticFrame; } export interface AddressRecipeDefinition { @@ -188,7 +218,16 @@ export interface AddressExecutionDebug { mcp_call_status_legacy: Exclude; account_scope_mode: AddressAccountScopeMode; account_scope_fallback_applied: boolean; - anchor_type: "account" | "counterparty" | "contract" | "document_ref" | "item" | "warehouse" | "unknown" | null; + anchor_type: + | "account" + | "counterparty" + | "contract" + | "document_ref" + | "item" + | "organization" + | "warehouse" + | "unknown" + | null; anchor_value_raw: string | null; anchor_value_resolved: string | null; resolver_confidence: "high" | "medium" | "low" | null; @@ -221,6 +260,7 @@ export interface AddressExecutionDebug { | "rows_remaining_after_scope_filter"; runtime_readiness: AddressRuntimeReadiness; limited_reason_category: AddressLimitedReasonCategory | null; + semantic_frame?: AddressSemanticFrame | null; response_type: AddressResponseType; requested_result_mode?: AddressResultMode; result_mode?: AddressResultMode; diff --git a/llm_normalizer/backend/src/types/normalizer.ts b/llm_normalizer/backend/src/types/normalizer.ts index b35563f..58e1507 100644 --- a/llm_normalizer/backend/src/types/normalizer.ts +++ b/llm_normalizer/backend/src/types/normalizer.ts @@ -86,6 +86,24 @@ export type SoftAssumption = | "problem_scan_mode_enabled"; export type RouteStatus = "routed" | "no_route"; export type NoRouteReason = "out_of_scope" | "insufficient_specificity" | "missing_mapping" | "unsupported_fragment_type"; +export type FragmentScopeTargetKind = + | "none" + | "self_scope" + | "selected_object" + | "organization" + | "warehouse" + | "counterparty" + | "contract" + | "item"; +export type FragmentDateScopeKind = "explicit" | "implicit_current" | "missing"; + +export interface NormalizedFragmentSemanticHints { + scope_target_kind: FragmentScopeTargetKind; + scope_target_text: string | null; + date_scope_kind: FragmentDateScopeKind; + self_scope_detected: boolean; + selected_object_scope_detected: boolean; +} export interface NormalizedFragmentV2 { fragment_id: string; @@ -113,6 +131,7 @@ export interface NormalizedFragmentV2 { asks_for_evidence: boolean; mentions_period_close_context: boolean; }; + semantic_hints: NormalizedFragmentSemanticHints; candidate_labels: IntentClass[]; confidence: ConfidenceLevel; } diff --git a/llm_normalizer/backend/tests/addressImplicitOrganizationScope.test.ts b/llm_normalizer/backend/tests/addressImplicitOrganizationScope.test.ts new file mode 100644 index 0000000..60aa5b2 --- /dev/null +++ b/llm_normalizer/backend/tests/addressImplicitOrganizationScope.test.ts @@ -0,0 +1,183 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +const { executeAddressMcpQueryMock } = vi.hoisted(() => ({ + executeAddressMcpQueryMock: vi.fn() +})); + +vi.mock("../src/services/addressMcpClient", async () => { + const actual = await vi.importActual( + "../src/services/addressMcpClient" + ); + return { + ...actual, + executeAddressMcpQuery: executeAddressMcpQueryMock + }; +}); + +import { AddressQueryService } from "../src/services/addressQueryService"; + +afterEach(() => { + executeAddressMcpQueryMock.mockReset(); + vi.restoreAllMocks(); +}); + +describe("implicit organization stock scope", () => { + it("uses llm semantic hints to ground informal organization wording without turning it into warehouse anchor", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 148261.67, + Quantity: 22, + SubcontoDt1: "Модуль прямоугольый 1400*110*750", + Warehouse: "Основной склад", + Organization: 'ООО "Альтернатива Плюс"' + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("что на складе конторы альтернатива", { + llmSemanticHints: { + scope_target_kind: "organization", + scope_target_text: "Альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }); + + expect(result?.handled).toBe(true); + expect(result?.reply_type).toBe("factual"); + expect(result?.response_type).toBe("FACTUAL_LIST"); + expect(result?.debug.detected_intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.debug.selected_recipe).toBe("address_inventory_on_hand_as_of_date_v1"); + expect(result?.debug.mcp_call_status).toBe("matched_non_empty"); + expect(result?.debug.extracted_filters?.organization).toBe("Альтернатива"); + expect(result?.debug.extracted_filters?.warehouse).toBeUndefined(); + expect(result?.debug.semantic_frame?.scope_kind).toBe("explicit_anchor"); + expect(result?.debug.semantic_frame?.anchor_kind).toBe("organization"); + expect(result?.debug.semantic_frame?.anchor_value).toBe("Альтернатива"); + expect(result?.debug.as_of_date_basis).toBe("implicit_current_snapshot"); + expect(String(result?.reply_text ?? "")).toContain("Модуль прямоугольый 1400*110*750"); + }); + + it("re-grounds warehouse-like informal company wording to live organization candidate set", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 833.33, + Quantity: 1, + SubcontoDt1: "Четки Пост (84*117)", + Warehouse: "Основной склад", + Organization: "ООО КОТ ССЫТ ВО ДВОРЕ" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("что на складе конторы ссыт кот", { + activeOrganization: "ООО КОТ ССЫТ ВО ДВОРЕ", + knownOrganizations: ["ООО КОТ ССЫТ ВО ДВОРЕ", "ООО Альтернатива Плюс"] + }); + + expect(result?.handled).toBe(true); + expect(result?.reply_type).toBe("factual"); + expect(result?.debug.extracted_filters?.organization).toBe("ООО КОТ ССЫТ ВО ДВОРЕ"); + expect(result?.debug.extracted_filters?.warehouse).toBeUndefined(); + expect(result?.debug.anchor_type).toBe("organization"); + expect(result?.debug.reasons).toContain("warehouse_anchor_regrounded_to_organization_scope"); + expect(result?.debug.reasons).toContain("organization_scope_live_grounding_recovered_rows"); + expect(String(result?.reply_text ?? "")).toContain("Четки Пост (84*117)"); + }); + + it("handles slang stock-state wording as current inventory snapshot for grounded organization scope", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 34490, + Quantity: 1, + SubcontoDt1: "Диван трехместный", + Warehouse: "Основной склад", + Organization: "ООО Альтернатива Плюс" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("чекни плиз чо там на складе альтернативы происходит", { + activeOrganization: "ООО Альтернатива Плюс", + knownOrganizations: ["ООО Альтернатива Плюс", "ООО Лайсвуд"] + }); + + expect(result?.handled).toBe(true); + expect(result?.reply_type).toBe("factual"); + expect(result?.debug.detected_intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.debug.selected_recipe).toBe("address_inventory_on_hand_as_of_date_v1"); + expect(result?.debug.extracted_filters?.organization).toBe("ООО Альтернатива Плюс"); + expect(result?.debug.extracted_filters?.warehouse).toBeUndefined(); + expect(result?.debug.as_of_date_basis).toBe("implicit_current_snapshot"); + expect(String(result?.reply_text ?? "")).toContain("Диван трехместный"); + }); + + it("handles short colloquial stock query as current inventory snapshot for grounded organization scope", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 6490, + Quantity: 1, + SubcontoDt1: "Пуф арий", + Warehouse: "Основной склад", + Organization: "ООО Альтернатива Плюс" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("че на складах альтернативы", { + activeOrganization: "ООО Альтернатива Плюс", + knownOrganizations: ["ООО Альтернатива Плюс", "ООО Лайсвуд"] + }); + + expect(result?.handled).toBe(true); + expect(result?.reply_type).toBe("factual"); + expect(result?.debug.detected_intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.debug.selected_recipe).toBe("address_inventory_on_hand_as_of_date_v1"); + expect(result?.debug.extracted_filters?.organization).toBe("ООО Альтернатива Плюс"); + expect(result?.debug.extracted_filters?.warehouse).toBeUndefined(); + expect(result?.debug.anchor_type).toBe("organization"); + expect(result?.debug.as_of_date_basis).toBe("implicit_current_snapshot"); + expect(String(result?.reply_text ?? "")).toContain("Пуф арий"); + }); +}); diff --git a/llm_normalizer/backend/tests/addressImplicitSelfScope.test.ts b/llm_normalizer/backend/tests/addressImplicitSelfScope.test.ts new file mode 100644 index 0000000..f49e395 --- /dev/null +++ b/llm_normalizer/backend/tests/addressImplicitSelfScope.test.ts @@ -0,0 +1,98 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +const { executeAddressMcpQueryMock } = vi.hoisted(() => ({ + executeAddressMcpQueryMock: vi.fn() +})); + +vi.mock("../src/services/addressMcpClient", async () => { + const actual = await vi.importActual( + "../src/services/addressMcpClient" + ); + return { + ...actual, + executeAddressMcpQuery: executeAddressMcpQueryMock + }; +}); + +import { AddressQueryService } from "../src/services/addressQueryService"; + +afterEach(() => { + executeAddressMcpQueryMock.mockReset(); + vi.restoreAllMocks(); +}); + +describe("implicit self-scope stock snapshot", () => { + it("does not turn 'у нас' into a literal warehouse anchor", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 498472.5, + Quantity: 3, + SubcontoDt1: "Конструкция трансформер рабочей станции 1300*900*2000", + Warehouse: "Основной склад", + Organization: 'ООО "Альтернатива Плюс"' + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("что на складе у нас"); + + expect(result?.handled).toBe(true); + expect(result?.reply_type).toBe("factual"); + expect(result?.response_type).toBe("FACTUAL_LIST"); + expect(result?.debug.detected_intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.debug.selected_recipe).toBe("address_inventory_on_hand_as_of_date_v1"); + expect(result?.debug.mcp_call_status).toBe("matched_non_empty"); + expect(result?.debug.extracted_filters?.warehouse).toBeUndefined(); + expect(result?.debug.as_of_date_basis).toBe("implicit_current_snapshot"); + expect(result?.debug.semantic_frame?.scope_kind).toBe("implicit_self_scope"); + expect(result?.debug.semantic_frame?.anchor_kind).toBe("self_scope"); + expect(result?.debug.semantic_frame?.date_scope_kind).toBe("implicit_current"); + expect(String(result?.reply_text ?? "")).toContain("Конструкция трансформер рабочей станции 1300*900*2000"); + expect(executeAddressMcpQueryMock).toHaveBeenCalledTimes(1); + }); + + it("grounds implicit self-scope to active organization when one is in focus", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2026-04-15T23:59:59Z", + Registrator: "Остатки товаров на складах", + AccountDt: "41.01", + AccountKt: "00.00", + Amount: 34490, + Quantity: 1, + SubcontoDt1: "Диван трехместный", + Warehouse: "Основной склад", + Organization: 'ООО "Альтернатива Плюс"' + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("что на складе у нас", { + activeOrganization: "ООО Альтернатива Плюс", + knownOrganizations: ["ООО Альтернатива Плюс", "ООО Лайсвуд"] + }); + + expect(result?.handled).toBe(true); + expect(result?.debug.extracted_filters?.organization).toBe("ООО Альтернатива Плюс"); + expect(result?.debug.semantic_frame?.scope_kind).toBe("implicit_self_scope"); + expect(result?.debug.semantic_frame?.anchor_kind).toBe("self_scope"); + expect(result?.debug.reasons).toContain("organization_from_active_scope"); + expect(String(result?.reply_text ?? "")).toContain("Диван трехместный"); + }); +}); diff --git a/llm_normalizer/backend/tests/addressInventoryRootFrameFollowup.test.ts b/llm_normalizer/backend/tests/addressInventoryRootFrameFollowup.test.ts new file mode 100644 index 0000000..4768f32 --- /dev/null +++ b/llm_normalizer/backend/tests/addressInventoryRootFrameFollowup.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from "vitest"; + +import { runAddressDecomposeStage } from "../src/services/address_runtime/decomposeStage"; + +describe("inventory root frame follow-up", () => { + it("restores the root inventory frame for a temporal patch after drilldown", () => { + const result = runAddressDecomposeStage("а на май 2020", { + previous_intent: "inventory_purchase_provenance_for_item", + previous_filters: { + item: "Кресло орион", + organization: "альтернатива", + counterparty: "альтернатива", + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31" + }, + previous_anchor_type: "item", + previous_anchor_value: "Кресло орион", + root_intent: "inventory_on_hand_as_of_date", + root_filters: { + organization: "альтернатива", + counterparty: "альтернатива", + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31" + }, + root_anchor_type: "organization", + root_anchor_value: "ООО \\Альтернатива Плюс\\", + current_frame_kind: "inventory_drilldown" + }); + + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.baseReasons).toContain("intent_restored_to_inventory_root_frame"); + expect(result?.filters.extracted_filters.item).toBeUndefined(); + expect(result?.filters.extracted_filters.organization).toBe("альтернатива"); + expect(result?.filters.extracted_filters.counterparty).toBe("альтернатива"); + expect(result?.filters.extracted_filters.period_from).toBe("2020-05-01"); + expect(result?.filters.extracted_filters.period_to).toBe("2020-05-31"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2020-05-31"); + }); + + it("derives a relative month from the root frame year", () => { + const result = runAddressDecomposeStage("а на май этого же года", { + previous_intent: "inventory_purchase_provenance_for_item", + previous_filters: { + item: "Кресло орион", + organization: "альтернатива", + counterparty: "альтернатива", + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31" + }, + previous_anchor_type: "item", + previous_anchor_value: "Кресло орион", + root_intent: "inventory_on_hand_as_of_date", + root_filters: { + organization: "альтернатива", + counterparty: "альтернатива", + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31" + }, + root_anchor_type: "organization", + root_anchor_value: "ООО \\Альтернатива Плюс\\", + current_frame_kind: "inventory_drilldown" + }); + + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.filters.extracted_filters.period_from).toBe("2020-05-01"); + expect(result?.filters.extracted_filters.period_to).toBe("2020-05-31"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2020-05-31"); + expect(result?.baseReasons).toContain("period_derived_from_inventory_root_frame_year"); + }); +}); diff --git a/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts b/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts index 0e24036..b1e410f 100644 --- a/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts +++ b/llm_normalizer/backend/tests/addressInventorySelectedObjectFollowup.test.ts @@ -521,4 +521,64 @@ describe("inventory selected-object follow-up", () => { expect(result?.debug.rows_matched).toBeGreaterThan(0); expect(String(result?.reply_text ?? "")).not.toContain("совпадений не нашлось"); }); + + it("clears carried as-of date during history recovery for selected-object provenance after dated stock slice", async () => { + executeAddressMcpQueryMock + .mockResolvedValueOnce({ + fetched_rows: 0, + matched_rows: 0, + raw_rows: [], + rows: [], + error: null + }) + .mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2020-06-18T00:00:00Z", + Registrator: "Поступление товаров и услуг 00000000101 от 18.06.2020 0:00:00", + AccountDt: "41.01", + AccountKt: "60.01", + Amount: 13490, + SubcontoDt1: "Кресло орион", + SubcontoDt3: "Основной склад", + SubcontoKt1: "ООО \\Гамма-мебель\\", + SubcontoKt2: "Договор поставки № 11 от 15.06.2020", + Organization: "ООО \\Альтернатива Плюс\\" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle('По выбранному объекту "Кресло орион": кто поставил это?', { + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2020-03-31", + period_from: "2020-03-01", + period_to: "2020-03-31", + organization: "ООО \\Альтернатива Плюс\\" + }, + previous_anchor_type: "counterparty", + previous_anchor_value: "ООО \\Альтернатива Плюс\\" + } + }); + + expect(result?.handled).toBe(true); + expect(result?.response_type).toBe("FACTUAL_SUMMARY"); + expect(result?.debug.detected_intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.debug.extracted_filters?.item).toBe("Кресло орион"); + expect(result?.debug.extracted_filters?.as_of_date).toBe("2020-03-31"); + expect(result?.debug.extracted_filters?.period_from).toBe("2020-03-01"); + expect(result?.debug.extracted_filters?.period_to).toBe("2020-03-31"); + expect(result?.debug.reasons).toContain("as_of_date_cleared_for_history_recovery"); + expect(result?.debug.reasons).toContain("period_window_auto_broadened_to_available_data"); + expect(result?.debug.limitations).toContain("as_of_date_cleared_for_history_recovery"); + expect(result?.debug.limitations).toContain("period_window_auto_broadened_to_available_data"); + expect(String(result?.reply_text ?? "")).toContain("ООО \\Гамма-мебель\\"); + expect(executeAddressMcpQueryMock).toHaveBeenCalledTimes(2); + }); }); diff --git a/llm_normalizer/backend/tests/addressInventoryWarehouseAnchor.test.ts b/llm_normalizer/backend/tests/addressInventoryWarehouseAnchor.test.ts index 6c83227..650ab84 100644 --- a/llm_normalizer/backend/tests/addressInventoryWarehouseAnchor.test.ts +++ b/llm_normalizer/backend/tests/addressInventoryWarehouseAnchor.test.ts @@ -23,4 +23,15 @@ describe("inventory warehouse anchor extraction", () => { expect(filters.as_of_date).toBe("2019-03-31"); expect(filters.warehouse).toBeUndefined(); }); + it("treats 'у нас' as implicit self-scope instead of literal warehouse anchor", () => { + const result = extractAddressFilters("что на складе у нас", "inventory_on_hand_as_of_date"); + + expect(result.extracted_filters.warehouse).toBeUndefined(); + expect(result.warnings).toContain("warehouse_self_scope_detected"); + expect(result.semantic_frame?.scope_kind).toBe("implicit_self_scope"); + expect(result.semantic_frame?.anchor_kind).toBe("self_scope"); + expect(result.semantic_frame?.anchor_value).toBeNull(); + expect(result.semantic_frame?.date_scope_kind).toBe("implicit_current"); + expect(result.semantic_frame?.date_basis_hint).toBe("implicit_current_snapshot"); + }); }); diff --git a/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts index 8b8e404..051659a 100644 --- a/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts @@ -193,4 +193,42 @@ describe("assistant address attempt runtime adapter", () => { }) ); }); + + it("forwards llm semantic hints from address runtime into lane attempt runtime input", async () => { + const runAddressLaneAttemptRuntime = vi.fn(async () => ({ + response_type: "READY" + })); + const runAddressRuntime = vi.fn(async (input: any) => { + await input.runAddressLaneAttempt("что на складе конторы альтернатива", null, null, { + scope_target_kind: "organization", + scope_target_text: "Альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + }); + + return { + handled: false, + response: null, + addressRuntimeMetaForDeep: null + }; + }); + + await runAssistantAddressAttemptRuntime( + buildInput({ + runAddressRuntime, + runAddressLaneAttemptRuntime + }) + ); + + expect(runAddressLaneAttemptRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + messageUsed: "что на складе конторы альтернатива", + llmSemanticHints: expect.objectContaining({ + scope_target_kind: "organization", + scope_target_text: "Альтернатива" + }) + }) + ); + }); }); diff --git a/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts b/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts index 6967215..dcd14d7 100644 --- a/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts @@ -475,6 +475,10 @@ describe("assistant address follow-up carryover", () => { expect(calls[1].options?.followupContext?.previous_filters?.as_of_date).toBe("2020-06-30"); expect(calls[1].options?.followupContext?.previous_filters?.period_from).toBe("2020-06-01"); expect(calls[1].options?.followupContext?.previous_filters?.period_to).toBe("2020-06-30"); + expect(calls[1].options?.followupContext?.root_intent).toBe("inventory_on_hand_as_of_date"); + expect(calls[1].options?.followupContext?.root_filters?.organization).toBe("ООО \\Альтернатива Плюс\\"); + expect(calls[1].options?.followupContext?.root_filters?.as_of_date).toBe("2020-06-30"); + expect(calls[1].options?.followupContext?.current_frame_kind).toBe("inventory_root"); expect(calls[1].options?.followupContext?.previous_filters?.warehouse).toBe("Основной склад"); expect(normalizerService.normalize).not.toHaveBeenCalled(); }); diff --git a/llm_normalizer/backend/tests/assistantAddressLaneAttemptInputBuilder.test.ts b/llm_normalizer/backend/tests/assistantAddressLaneAttemptInputBuilder.test.ts index 078abbe..702edc9 100644 --- a/llm_normalizer/backend/tests/assistantAddressLaneAttemptInputBuilder.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLaneAttemptInputBuilder.test.ts @@ -11,6 +11,7 @@ function buildInput(overrides: Record = {}) { carryMeta: { followupContext: { previous_intent: "docs_by_counterparty" } }, analysisDateHint: "2020-08-31", activeOrganization: "Org A", + knownOrganizations: ["Org A", "Org B"], mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle, ...overrides @@ -24,6 +25,7 @@ describe("assistant address lane attempt input builder", () => { expect(runtimeInput.messageUsed).toBe("Show overdue docs"); expect(runtimeInput.analysisDateHint).toBe("2020-08-31"); expect(runtimeInput.activeOrganization).toBe("Org A"); + expect(runtimeInput.knownOrganizations).toEqual(["Org A", "Org B"]); expect(runtimeInput.carryMeta).toEqual({ followupContext: { previous_intent: "docs_by_counterparty" } }); @@ -37,6 +39,7 @@ describe("assistant address lane attempt input builder", () => { carryMeta: null, analysisDateHint: null, activeOrganization: null, + knownOrganizations: [], mergeFollowupContextWithOrganizationScope, runAddressQueryTryHandle }) diff --git a/llm_normalizer/backend/tests/assistantAddressLaneAttemptQueryOptionsBuilder.test.ts b/llm_normalizer/backend/tests/assistantAddressLaneAttemptQueryOptionsBuilder.test.ts index ec05d5a..a4bc116 100644 --- a/llm_normalizer/backend/tests/assistantAddressLaneAttemptQueryOptionsBuilder.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLaneAttemptQueryOptionsBuilder.test.ts @@ -23,7 +23,10 @@ describe("assistant address lane attempt query options builder", () => { scopedFollowupContext: { previous_intent: "docs_by_counterparty", active_organization: "Org A" - } + }, + activeOrganization: "Org A", + knownOrganizations: ["Org A", "Org B"], + llmSemanticHints: null }); expect(options).toEqual({ @@ -31,14 +34,19 @@ describe("assistant address lane attempt query options builder", () => { previous_intent: "docs_by_counterparty", active_organization: "Org A" }, - analysisDateHint: "2020-07-31" + analysisDateHint: "2020-07-31", + activeOrganization: "Org A", + knownOrganizations: ["Org A", "Org B"] }); }); it("builds query options with only analysis date when scoped context is missing", () => { const options = buildAssistantAddressLaneAttemptQueryOptions({ analysisDateHint: null, - scopedFollowupContext: null + scopedFollowupContext: null, + activeOrganization: null, + knownOrganizations: [], + llmSemanticHints: null }); expect(options).toEqual({ diff --git a/llm_normalizer/backend/tests/assistantAddressLaneAttemptRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantAddressLaneAttemptRuntimeAdapter.test.ts index 36ce31e..edde42b 100644 --- a/llm_normalizer/backend/tests/assistantAddressLaneAttemptRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLaneAttemptRuntimeAdapter.test.ts @@ -15,6 +15,7 @@ describe("assistant address lane attempt runtime adapter", () => { }, analysisDateHint: "2020-07-31", activeOrganization: "ООО Тест", + knownOrganizations: ["ООО Тест", "ООО Лютик"], mergeFollowupContextWithOrganizationScope: () => ({ previous_intent: "docs_by_counterparty", active_organization: "ООО Тест" @@ -27,7 +28,9 @@ describe("assistant address lane attempt runtime adapter", () => { previous_intent: "docs_by_counterparty", active_organization: "ООО Тест" }, - analysisDateHint: "2020-07-31" + analysisDateHint: "2020-07-31", + activeOrganization: "ООО Тест", + knownOrganizations: ["ООО Тест", "ООО Лютик"] }); expect(result).toEqual({ response_type: "READY" @@ -41,6 +44,7 @@ describe("assistant address lane attempt runtime adapter", () => { carryMeta: null, analysisDateHint: null, activeOrganization: null, + knownOrganizations: [], mergeFollowupContextWithOrganizationScope: () => null, runAddressQueryTryHandle }); @@ -49,4 +53,36 @@ describe("assistant address lane attempt runtime adapter", () => { analysisDateHint: null }); }); + + it("forwards llm semantic hints into query options", async () => { + const runAddressQueryTryHandle = vi.fn(async () => ({ + response_type: "READY" + })); + + await runAssistantAddressLaneAttemptRuntime({ + messageUsed: "что на складе конторы альтернатива", + carryMeta: null, + analysisDateHint: null, + llmSemanticHints: { + scope_target_kind: "organization", + scope_target_text: "Альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + }, + activeOrganization: null, + knownOrganizations: ["ООО Альтернатива Плюс"], + mergeFollowupContextWithOrganizationScope: () => null, + runAddressQueryTryHandle + }); + + expect(runAddressQueryTryHandle).toHaveBeenCalledWith("что на складе конторы альтернатива", { + analysisDateHint: null, + knownOrganizations: ["ООО Альтернатива Плюс"], + llmSemanticHints: expect.objectContaining({ + scope_target_kind: "organization", + scope_target_text: "Альтернатива" + }) + }); + }); }); diff --git a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts index e47a50d..518c735 100644 --- a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts @@ -164,6 +164,59 @@ describe("assistant address llm pre-decompose candidate preference", () => { const addressQueryService = { tryHandle: vi.fn(async (message: string) => { calls.push({ message }); + if (message === "получить остатки по складу для организации 'альтернатива'") { + return { + handled: true, + reply_text: `handled: ${message}`, + reply_type: "factual", + response_type: "FACTUAL_LIST", + debug: { + detected_mode: "address_query", + detected_mode_confidence: "high", + query_shape: "UNKNOWN", + query_shape_confidence: "low", + detected_intent: "inventory_on_hand_as_of_date", + detected_intent_confidence: "high", + extracted_filters: { + sort: "period_desc", + organization: "альтернатива", + counterparty: "альтернатива", + as_of_date: "2026-04-15" + }, + missing_required_filters: [], + selected_recipe: "address_inventory_on_hand_as_of_date_v1", + mcp_call_status_legacy: "matched_non_empty", + account_scope_mode: "strict", + account_scope_fallback_applied: false, + anchor_type: "counterparty", + anchor_value_raw: "альтернатива", + anchor_value_resolved: "ООО \\Альтернатива Плюс\\", + resolver_confidence: "medium", + ambiguity_count: 0, + match_failure_stage: "none", + match_failure_reason: null, + mcp_call_status: "matched_non_empty", + rows_fetched: 1, + raw_rows_received: 1, + rows_after_account_scope: 1, + rows_after_recipe_filter: 1, + rows_materialized: 1, + rows_matched: 1, + raw_row_keys_sample: [], + materialization_drop_reason: "none", + account_token_raw: null, + account_token_normalized: null, + account_scope_fields_checked: ["account_dt", "account_kt", "registrator", "analytics"], + account_scope_match_strategy: "account_code_regex_plus_alias_map_v1", + account_scope_drop_reason: "not_applicable", + runtime_readiness: "LIVE_QUERYABLE_WITH_LIMITS", + limited_reason_category: null, + response_type: "FACTUAL_LIST", + limitations: [], + reasons: ["inventory_on_hand_signal_detected"] + } + }; + } return buildAddressLaneResult(message); }) } as any; @@ -449,6 +502,177 @@ describe("assistant address llm pre-decompose candidate preference", () => { ]).toContain(response.debug?.llm_decomposition_reason); }); + it("prefers raw selected-object sale follow-up when llm rewrite drifts into generic open-items intent", async () => { + const calls: Array<{ message: string }> = []; + const addressQueryService = { + tryHandle: vi.fn(async (message: string) => { + calls.push({ message }); + return buildAddressLaneResult(message); + }) + } as any; + + const normalizerService = { + normalize: vi.fn(async (payload: any) => { + if (payload?.userQuestion === "какие остатки по складу у альтернативы") { + return { + trace_id: "norm-predecompose-root-stock", + ok: true, + normalized: { + schema_version: "normalized_query_v2_0_2", + user_message_raw: "какие остатки по складу у альтернативы", + message_in_scope: true, + scope_confidence: "medium", + contains_multiple_tasks: false, + fragments: [ + { + fragment_id: "F1", + raw_fragment_text: "какие остатки по складу у альтернативы", + normalized_fragment_text: "получить остатки по складу для организации 'альтернатива'", + domain_relevance: "in_scope", + business_scope: "company_specific_accounting", + entity_hints: [], + account_hints: [], + document_hints: [], + register_hints: [], + time_scope: { + type: "missing", + value: null, + confidence: "low" + }, + flags: { + has_multi_entity_scope: false, + asks_for_chain_explanation: false, + asks_for_ranking_or_top: false, + asks_for_period_summary: false, + asks_for_rule_check: false, + asks_for_anomaly_scan: false, + asks_for_exact_object_trace: false, + asks_for_evidence: false, + mentions_period_close_context: false + }, + candidate_labels: ["simple_factual"], + confidence: "medium", + execution_readiness: "executable", + clarification_reason: null, + soft_assumption_used: [], + route_status: "routed", + no_route_reason: null + } + ], + discarded_fragments: [], + global_notes: { + needs_clarification: false, + clarification_reason: null + } + }, + raw_model_output: null, + validation: { passed: true, errors: [] }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + latency_ms: 10, + prompt_version: "normalizer_v2_0_2", + schema_version: "v2_0_2", + request_count_for_case: 1 + }; + } + return { + trace_id: "norm-predecompose-selected-object-sale-drift", + ok: true, + normalized: { + schema_version: "normalized_query_v2_0_2", + user_message_raw: + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": кому мы это продали в итоге', + message_in_scope: true, + scope_confidence: "medium", + contains_multiple_tasks: false, + fragments: [ + { + fragment_id: "F1", + raw_fragment_text: + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": кому мы это продали в итоге', + normalized_fragment_text: + "Определить контрагента, которому была реализована позиция «Рабочая станция универсального специалиста (индивидуальное изготовление)» по выбранному объекту", + domain_relevance: "in_scope", + business_scope: "company_specific_accounting", + entity_hints: [], + account_hints: [], + document_hints: [], + register_hints: [], + time_scope: { + type: "missing", + value: null, + confidence: "low" + }, + flags: { + has_multi_entity_scope: false, + asks_for_chain_explanation: false, + asks_for_ranking_or_top: false, + asks_for_period_summary: false, + asks_for_rule_check: false, + asks_for_anomaly_scan: false, + asks_for_exact_object_trace: true, + asks_for_evidence: false, + mentions_period_close_context: false + }, + candidate_labels: ["simple_factual"], + confidence: "medium", + execution_readiness: "executable", + clarification_reason: null, + soft_assumption_used: [], + route_status: "routed", + no_route_reason: null + } + ], + discarded_fragments: [], + global_notes: { + needs_clarification: false, + clarification_reason: null + } + }, + raw_model_output: null, + validation: { passed: true, errors: [] }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + latency_ms: 10, + prompt_version: "normalizer_v2_0_2", + schema_version: "v2_0_2", + request_count_for_case: 1 + }; + }) + } as any; + + const sessions = new AssistantSessionStore(); + const service = new AssistantService( + normalizerService, + sessions as any, + {} as any, + { persistSession: vi.fn() } as any, + addressQueryService + ); + + const sessionId = `asst-predecompose-selected-object-sale-${Date.now()}`; + await service.handleMessage({ + session_id: sessionId, + user_message: "какие остатки по складу у альтернативы", + llmProvider: "local", + useMock: false + } as any); + + const response = await service.handleMessage({ + session_id: sessionId, + user_message: + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": кому мы это продали в итоге', + llmProvider: "local", + useMock: false + } as any); + + expect(response.ok).toBe(true); + expect(response.reply_type).toBe("factual"); + expect(calls).toHaveLength(2); + expect(calls[1].message).toBe( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": кому мы это продали в итоге' + ); + expect(response.debug?.llm_decomposition_reason).toBe("followup_raw_message_preferred_over_llm_rewrite"); + }); + it("does not treat service verb as counterparty anchor when llm rewrites noisy bank phrase", async () => { const calls: Array<{ message: string }> = []; const addressQueryService = { diff --git a/llm_normalizer/backend/tests/assistantAddressOrchestrationRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantAddressOrchestrationRuntimeAdapter.test.ts index 77a9cdf..8c08030 100644 --- a/llm_normalizer/backend/tests/assistantAddressOrchestrationRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressOrchestrationRuntimeAdapter.test.ts @@ -99,5 +99,142 @@ describe("assistant address orchestration runtime adapter", () => { expect(output.livingModeDecision.mode).toBe("chat"); expect(output.addressRuntimeMeta.toolGateDecision).toBe("skip_address_lane"); }); -}); + it("prefers raw short follow-up over unsupported llm rewrite when carryover context exists", async () => { + const resolveAddressFollowupCarryoverContext = vi.fn(() => ({ + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + organization: "ООО \\Альтернатива Плюс\\", + as_of_date: "2026-04-15" + } + } + })); + const resolveAssistantOrchestrationDecision = vi.fn(() => ({ + runAddressLane: true, + livingMode: "address_data", + livingReason: "address_lane_triggered", + toolGateDecision: "run_address_lane", + toolGateReason: "followup_context_detected", + orchestrationContract: { schema_version: "assistant_orchestration_contract_v1" } + })); + const buildAddressLlmPredecomposeContractV1 = vi.fn(({ sourceMessage, canonicalMessage }: { sourceMessage: string; canonicalMessage: string }) => ({ + schema_version: "address_llm_predecompose_contract_v1", + source_message: sourceMessage, + canonical_message: canonicalMessage, + mode: canonicalMessage === sourceMessage ? "address_query" : "unsupported", + intent: canonicalMessage === sourceMessage ? "inventory_on_hand_as_of_date" : "unknown" + })); + + const output = await buildAssistantAddressOrchestrationRuntime( + buildInput({ + userMessage: "ахуен а на март 2020", + runAddressLlmPreDecompose: vi.fn(async () => ({ + attempted: true, + applied: true, + effectiveMessage: "что не так в бухгалтерии за март 2020 года?", + reason: "normalized_fragment_applied", + predecomposeContract: { + mode: "unsupported", + intent: "unknown" + } + })), + buildAddressLlmPredecomposeContractV1, + resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision + }) + ); + + expect(output.addressInputMessage).toBe("ахуен а на март 2020"); + expect(output.addressPreDecompose.applied).toBe(false); + expect(output.addressPreDecompose.reason).toBe("followup_raw_message_preferred_over_llm_rewrite"); + expect(output.addressPreDecompose.predecomposeContract).toEqual( + expect.objectContaining({ + canonical_message: "ахуен а на март 2020", + mode: "address_query", + intent: "inventory_on_hand_as_of_date" + }) + ); + expect(buildAddressLlmPredecomposeContractV1).toHaveBeenCalledWith({ + sourceMessage: "ахуен а на март 2020", + canonicalMessage: "ахуен а на март 2020" + }); + expect(resolveAddressFollowupCarryoverContext).toHaveBeenCalledTimes(2); + expect(resolveAssistantOrchestrationDecision).toHaveBeenCalledWith( + expect.objectContaining({ + rawUserMessage: "ахуен а на март 2020", + effectiveAddressUserMessage: "ахуен а на март 2020" + }) + ); + }); + + it("prefers raw selected-object inventory action over generic canonical drift intent", async () => { + const resolveAddressFollowupCarryoverContext = vi.fn(() => ({ + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + organization: "ООО \\Альтернатива Плюс\\", + as_of_date: "2016-06-30", + period_from: "2016-06-01", + period_to: "2016-06-30" + } + } + })); + const resolveAssistantOrchestrationDecision = vi.fn(() => ({ + runAddressLane: true, + livingMode: "address_data", + livingReason: "address_lane_triggered", + toolGateDecision: "run_address_lane", + toolGateReason: "address_mode_classifier_detected", + orchestrationContract: { schema_version: "assistant_orchestration_contract_v1" } + })); + const buildAddressLlmPredecomposeContractV1 = vi.fn(({ sourceMessage, canonicalMessage }: { sourceMessage: string; canonicalMessage: string }) => ({ + schema_version: "address_llm_predecompose_contract_v1", + source_message: sourceMessage, + canonical_message: canonicalMessage, + mode: "address_query", + intent: "unknown" + })); + + const rawMessage = + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": кому мы это продали в итоге'; + + const output = await buildAssistantAddressOrchestrationRuntime( + buildInput({ + userMessage: rawMessage, + runAddressLlmPreDecompose: vi.fn(async () => ({ + attempted: true, + applied: true, + effectiveMessage: + "Определить контрагента, которому была реализована позиция «Рабочая станция универсального специалиста (индивидуальное изготовление)» по выбранному объекту", + reason: "normalized_fragment_applied", + predecomposeContract: { + mode: "address_query", + intent: "open_items_by_counterparty_or_contract", + semantics: { + selected_object_scope_detected: true + } + } + })), + buildAddressLlmPredecomposeContractV1, + resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision + }) + ); + + expect(output.addressInputMessage).toBe(rawMessage); + expect(output.addressPreDecompose.applied).toBe(false); + expect(output.addressPreDecompose.reason).toBe("followup_raw_message_preferred_over_llm_rewrite"); + expect(buildAddressLlmPredecomposeContractV1).toHaveBeenCalledWith({ + sourceMessage: rawMessage, + canonicalMessage: rawMessage + }); + expect(resolveAddressFollowupCarryoverContext).toHaveBeenCalledTimes(2); + expect(resolveAssistantOrchestrationDecision).toHaveBeenCalledWith( + expect.objectContaining({ + rawUserMessage: rawMessage, + effectiveAddressUserMessage: rawMessage + }) + ); + }); +}); diff --git a/llm_normalizer/backend/tests/assistantAddressRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantAddressRuntimeAdapter.test.ts index 45e45a1..27a214f 100644 --- a/llm_normalizer/backend/tests/assistantAddressRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressRuntimeAdapter.test.ts @@ -173,7 +173,7 @@ describe("assistant address runtime adapter", () => { runAddressLaneRuntime }); - expect(runAddressLaneAttempt).toHaveBeenCalledWith("canon", null, "2020-07-31"); + expect(runAddressLaneAttempt).toHaveBeenCalledWith("canon", null, "2020-07-31", null); expect(finalizeAddressLaneResponse).toHaveBeenCalledWith( { handled: true }, "canon", @@ -193,4 +193,87 @@ describe("assistant address runtime adapter", () => { } }); }); + + it("passes llm semantic hints from orchestration metadata into lane attempts", async () => { + const runAddressLaneAttempt = vi.fn(async () => ({ + handled: true + })); + + const result = await runAssistantAddressRuntime({ + featureAssistantAddressQueryV1: true, + sessionId: "asst-4", + userMessage: "что на складе конторы альтернатива", + sessionItems: [], + llmProvider: "local", + useMock: false, + featureAddressLlmPredecomposeV1: true, + runAddressLlmPreDecompose: async () => ({}), + buildAddressLlmPredecomposeContractV1: () => ({}), + sanitizeAddressMessageForFallback: (value) => value, + toNonEmptyString: (value) => (typeof value === "string" && value.trim() ? value.trim() : null), + resolveAddressFollowupCarryoverContext: () => null, + resolveAssistantOrchestrationDecision: () => ({}), + buildAddressDialogContinuationContractV2: () => ({}), + runtimeAnalysisContextAsOfDate: null, + payloadContextPeriodHint: null, + compactWhitespace: (value) => value.replace(/\s+/g, " ").trim(), + runAddressLaneAttempt, + isRetryableAddressLimitedResult: () => false, + finalizeAddressLaneResponse: () => ({ ok: "address" }), + tryHandleLivingChat: async () => null, + logEvent: () => {}, + nowIso: () => "2026-04-10T00:00:00.000Z", + runAddressOrchestrationRuntime: async () => ({ + addressPreDecompose: {}, + addressInputMessage: "что на складе конторы альтернатива", + carryover: null, + orchestrationDecision: { runAddressLane: true }, + addressRuntimeMeta: { + attempted: true, + semanticHints: { + scope_target_kind: "organization", + scope_target_text: "Альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }, + livingModeDecision: { mode: "address_data", reason: "address_lane_triggered" } + }), + runAddressToolGateRuntime: async () => ({ + handled: false, + response: null + }), + runAddressLaneRuntime: async (input) => { + const addressLane = await input.runAddressLaneAttempt(input.addressInputMessage, null, input.llmSemanticHints ?? null); + return { + handled: true, + selection: { + addressLane: addressLane ?? { handled: true }, + messageUsed: input.addressInputMessage, + carryMeta: null + }, + retryAudit: { + attempted: false, + reason: null, + initial_limited_category: null, + retry_message: null, + retry_used_followup_context: false, + retry_result_category: null + } + }; + } + }); + + expect(runAddressLaneAttempt).toHaveBeenCalledWith( + "что на складе конторы альтернатива", + null, + null, + expect.objectContaining({ + scope_target_kind: "organization", + scope_target_text: "Альтернатива" + }) + ); + expect(result.handled).toBe(true); + }); }); diff --git a/llm_normalizer/backend/tests/assistantLivingRouter.test.ts b/llm_normalizer/backend/tests/assistantLivingRouter.test.ts index 3df5e86..4dbc109 100644 --- a/llm_normalizer/backend/tests/assistantLivingRouter.test.ts +++ b/llm_normalizer/backend/tests/assistantLivingRouter.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from "vitest"; import { resolveAssistantOrchestrationDecision, resolveLivingAssistantModeDecision } from "../src/services/assistantService"; +import { + buildAddressLlmPredecomposeContractV1, + buildAddressSemanticExtractionContractV1 +} from "../src/services/address_runtime/predecomposeContract"; describe("assistant living router mode decision", () => { it("returns address_data when address lane already triggered", () => { @@ -471,7 +475,9 @@ describe("assistant orchestration contract", () => { expect(decision.livingMode).toBe("address_data"); expect(decision.toolGateDecision).toBe("run_address_lane"); - expect(["address_signal_detected", "address_intent_resolver_detected"]).toContain(String(decision.toolGateReason)); + expect(["address_signal_detected", "address_intent_resolver_detected", "address_mode_classifier_detected"]).toContain( + String(decision.toolGateReason) + ); expect(decision.livingReason).toBe("address_lane_triggered"); }); @@ -772,6 +778,95 @@ describe("assistant orchestration contract", () => { expect(decision.livingReason).toBe("address_lane_triggered"); }); + it("keeps slang stock-state query with organization scope in address lane instead of deep fallback", () => { + const rawUserMessage = "чекни плиз чо там на складе альтернативы происходит"; + const effectiveAddressUserMessage = "проверь, что происходит на складе у компании 'альтернатива'"; + const predecomposeContract = buildAddressLlmPredecomposeContractV1({ + sourceMessage: rawUserMessage, + canonicalMessage: effectiveAddressUserMessage, + semanticHints: { + scope_target_kind: "organization", + scope_target_text: "альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }); + const semanticExtractionContract = buildAddressSemanticExtractionContractV1({ + sourceMessage: rawUserMessage, + canonicalMessage: effectiveAddressUserMessage, + predecomposeContract + }); + + const decision = resolveAssistantOrchestrationDecision({ + rawUserMessage, + effectiveAddressUserMessage, + followupContext: null, + llmPreDecomposeMeta: { + applied: true, + llmCanonicalCandidateDetected: true, + predecomposeContract, + semanticExtractionContract + } as any, + useMock: false + }); + + expect(decision.runAddressLane).toBe(true); + expect(decision.toolGateDecision).toBe("run_address_lane"); + expect(decision.livingMode).toBe("address_data"); + expect(decision.livingReason).toBe("address_lane_triggered"); + expect(decision.orchestrationContract?.unsupported_address_intent_fallback_to_deep).toBe(false); + expect(decision.orchestrationContract?.deep_analysis_signal_fallback_to_deep).toBe(false); + expect(decision.orchestrationContract?.semantic_route_arbitration?.supported_address_intent_detected).toBe(true); + }); + + it("keeps short colloquial stock query with organization scope in address lane instead of chat fallback", () => { + const rawUserMessage = "че на складах альтернативы"; + const effectiveAddressUserMessage = "что находится на складах у компании 'альтернатива'"; + const predecomposeContract = buildAddressLlmPredecomposeContractV1({ + sourceMessage: rawUserMessage, + canonicalMessage: effectiveAddressUserMessage, + semanticHints: { + scope_target_kind: "organization", + scope_target_text: "альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }); + const semanticExtractionContract = buildAddressSemanticExtractionContractV1({ + sourceMessage: rawUserMessage, + canonicalMessage: effectiveAddressUserMessage, + predecomposeContract + }); + + const decision = resolveAssistantOrchestrationDecision({ + rawUserMessage, + effectiveAddressUserMessage, + followupContext: null, + llmPreDecomposeMeta: { + applied: true, + llmCanonicalCandidateDetected: true, + predecomposeContract, + semanticExtractionContract + } as any, + useMock: false + }); + + expect(decision.runAddressLane).toBe(true); + expect(decision.toolGateDecision).toBe("run_address_lane"); + expect([ + "address_intent_resolver_detected", + "address_mode_classifier_detected", + "llm_canonical_data_signal_detected", + "address_signal_detected" + ]).toContain( + String(decision.toolGateReason) + ); + expect(decision.livingMode).toBe("address_data"); + expect(decision.livingReason).toBe("address_lane_triggered"); + }); + it("keeps open-contracts request in address lane even with stale deep followup context when LLM contract is absent", () => { const decision = resolveAssistantOrchestrationDecision({ rawUserMessage: "Покажи незакрытые договоры на 2020-12-31", diff --git a/llm_normalizer/backend/tests/assistantOrganizationMatcher.test.ts b/llm_normalizer/backend/tests/assistantOrganizationMatcher.test.ts new file mode 100644 index 0000000..5ef5f94 --- /dev/null +++ b/llm_normalizer/backend/tests/assistantOrganizationMatcher.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "vitest"; +import { + mergeKnownOrganizations, + normalizeOrganizationScopeSearchText, + resolveOrganizationSelectionFromMessage, + scoreOrganizationMentionInMessage +} from "../src/services/assistantOrganizationMatcher"; + +describe("assistant organization matcher", () => { + it("deduplicates known organizations by normalized search key", () => { + expect( + mergeKnownOrganizations([ + 'ООО "Альтернатива Плюс"', + "ооо альтернатива плюс", + "ООО Лайсвуд" + ]) + ).toEqual(['ООО "Альтернатива Плюс"', "ООО Лайсвуд"]); + }); + + it("matches incomplete or reordered organization mention against live candidates", () => { + const resolved = resolveOrganizationSelectionFromMessage("дай что сегодня на складе в конторе ссыт кот", [ + "ООО КОТ ССЫТ ВО ДВОРЕ", + "ООО Альтернатива Плюс" + ]); + + expect(resolved).toBe("ООО КОТ ССЫТ ВО ДВОРЕ"); + }); + + it("scores direct and fuzzy token overlap above ambiguity threshold", () => { + const score = scoreOrganizationMentionInMessage( + normalizeOrganizationScopeSearchText("что на складе конторы альтернатива"), + 'ООО "Альтернатива Плюс"' + ); + + expect(score).toBeGreaterThanOrEqual(90); + }); +}); diff --git a/llm_normalizer/backend/tests/assistantSemanticExtractionContract.test.ts b/llm_normalizer/backend/tests/assistantSemanticExtractionContract.test.ts index 77e2feb..09fdccd 100644 --- a/llm_normalizer/backend/tests/assistantSemanticExtractionContract.test.ts +++ b/llm_normalizer/backend/tests/assistantSemanticExtractionContract.test.ts @@ -58,5 +58,71 @@ describe("address semantic extraction contract", () => { expect(semantic.apply_canonical_recommended).toBe(true); expect(["high", "medium"]).toContain(semantic.quality); }); -}); + it("marks self-scope stock snapshot wording as implicit current scope, not explicit date", () => { + const sourceMessage = "что на складе у нас"; + const predecomposeContract = buildAddressLlmPredecomposeContractV1({ + sourceMessage, + canonicalMessage: sourceMessage + }); + expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date"); + expect(predecomposeContract.period.has_explicit_period).toBe(false); + expect(predecomposeContract.semantics.scope_kind).toBe("implicit_self_scope"); + expect(predecomposeContract.semantics.anchor_kind).toBe("self_scope"); + expect(predecomposeContract.semantics.date_scope_kind).toBe("implicit_current"); + expect(predecomposeContract.semantics.date_basis_hint).toBe("implicit_current_snapshot"); + }); + + it("accepts llm semantic hints for organization-scoped informal warehouse wording", () => { + const sourceMessage = "что на складе конторы альтернатива"; + const predecomposeContract = buildAddressLlmPredecomposeContractV1({ + sourceMessage, + canonicalMessage: sourceMessage, + semanticHints: { + scope_target_kind: "organization", + scope_target_text: "Альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }); + + expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date"); + expect(predecomposeContract.entities.organization).toBe("Альтернатива"); + expect(predecomposeContract.entities.counterparty).toBeNull(); + expect(predecomposeContract.semantics.scope_kind).toBe("explicit_anchor"); + expect(predecomposeContract.semantics.anchor_kind).toBe("organization"); + expect(predecomposeContract.semantics.anchor_value).toBe("Альтернатива"); + expect(predecomposeContract.period.has_explicit_period).toBe(false); + expect(predecomposeContract.semantics.date_scope_kind).toBe("implicit_current"); + }); + + it("keeps slang stock-state rewrite as address snapshot instead of deep investigation", () => { + const sourceMessage = "чекни плиз чо там на складе альтернативы происходит"; + const canonicalMessage = "проверь, что происходит на складе у компании 'альтернатива'"; + const predecomposeContract = buildAddressLlmPredecomposeContractV1({ + sourceMessage, + canonicalMessage, + semanticHints: { + scope_target_kind: "organization", + scope_target_text: "альтернатива", + date_scope_kind: "implicit_current", + self_scope_detected: false, + selected_object_scope_detected: false + } + }); + const semantic = buildAddressSemanticExtractionContractV1({ + sourceMessage, + canonicalMessage, + predecomposeContract + }); + + expect(predecomposeContract.mode).toBe("address_query"); + expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date"); + expect(predecomposeContract.entities.organization).toBe("альтернатива"); + expect(semantic.guard_hints.deep_investigation_signal_detected).toBe(false); + expect(semantic.guard_hints.canonical_data_signal_detected).toBe(true); + expect(semantic.valid).toBe(true); + expect(semantic.apply_canonical_recommended).toBe(true); + }); +}); diff --git a/llm_normalizer/prompts/developer/normalizer_v2_0_2.txt b/llm_normalizer/prompts/developer/normalizer_v2_0_2.txt index 0098030..22b7b83 100644 --- a/llm_normalizer/prompts/developer/normalizer_v2_0_2.txt +++ b/llm_normalizer/prompts/developer/normalizer_v2_0_2.txt @@ -13,7 +13,8 @@ Core behavior (v2.0.2): - soft_assumption_used - route_status - no_route_reason -5. Clarification must be rare and justified. +5. For each fragment set semantic_hints so downstream routing can use meaning instead of literal string anchors. +6. Clarification must be rare and justified. Execution-state policy: - Every in-scope fragment must produce a consistent execution state. @@ -53,6 +54,7 @@ Fragment required fields: - register_hints - time_scope - flags +- semantic_hints - candidate_labels - confidence - execution_readiness @@ -66,6 +68,27 @@ Soft assumptions (`soft_assumption_used`) allowed values: - company_scope_defaulted - problem_scan_mode_enabled +semantic_hints fields: +- scope_target_kind: none | self_scope | selected_object | organization | warehouse | counterparty | contract | item +- scope_target_text: short user-facing mention when scope_target_kind is organization/warehouse/counterparty/contract/item +- date_scope_kind: explicit | implicit_current | missing +- self_scope_detected: true when wording means "our own scope" or "this connected company" +- selected_object_scope_detected: true when wording refers to currently selected object/item + +Semantic-hints policy: +- Use semantic_hints to preserve meaning of colloquial or elliptical wording. +- Do not convert vague possessive wording into a fake literal anchor. +- If user means "our company / our connected base / current selected scope", prefer self_scope_detected=true and scope_target_kind=self_scope. +- If user refers to a company or organization colloquially, prefer scope_target_kind=organization, not warehouse. +- If user refers to the selected row/object/item, prefer selected_object_scope_detected=true and scope_target_kind=selected_object or item when item text is explicit. +- Do not invent exact database names. Use short text from the user in scope_target_text. + +Examples: +- "что на складе у нас" -> semantic_hints.scope_target_kind=self_scope; self_scope_detected=true; date_scope_kind=implicit_current +- "что на складе конторы альтернатива" -> semantic_hints.scope_target_kind=organization; scope_target_text="альтернатива"; date_scope_kind=implicit_current +- "по выбранному объекту ... кто поставщик" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true +- "по ней какие документы" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true + Global notes: - global_notes.needs_clarification should be true only when execution is truly blocked for all in-scope fragments. - global_notes.clarification_reason must explain the blocker. diff --git a/llm_normalizer/prompts/domain/normalizer_domain_v1_1.txt b/llm_normalizer/prompts/domain/normalizer_domain_v1_1.txt index 0812f90..9eede19 100644 --- a/llm_normalizer/prompts/domain/normalizer_domain_v1_1.txt +++ b/llm_normalizer/prompts/domain/normalizer_domain_v1_1.txt @@ -23,3 +23,14 @@ Важное правило: Если в одном вопросе есть и риск-лексика, и цепочка document/payment/posting, не понижать задачу до чистого `store_feature_risk`. Приоритет у causal cross-entity семантики. + +Неформальные scope-формулировки: +- "у нас", "у себя", "по нашей базе", "в нашей конторе" обычно означают self/company scope, а не буквальный якорь склада; +- "контора альтернатива", "альтернатива", "по фирме альтернатива" обычно означают organization scope, а не склад; +- "по выбранному объекту", "по ней", "по этой позиции", "по этому товару" обычно означают selected object scope. + +Для semantic_hints: +- если речь про текущую подключенную компанию/нашу базу -> scope_target_kind=self_scope; +- если речь про организацию/фирму/контору -> scope_target_kind=organization; +- если речь про выбранную позицию/объект -> scope_target_kind=selected_object; +- для складских snapshot-вопросов без даты обычно date_scope_kind=implicit_current.