ГЛОБАЛЬНЫЙ РЕФАКТОРИНГ АРХИТЕКТУРЫ - Рефакторинг этапов 3.1 - Добавлен строгий контракт семантического извлечения (валидность, качество, reason-codes, рекомендация применять canonical). Подключен semantic guard в predecompose/runtime: мягкое отклонение плохого canonical: отдельный safe-кейс для сырого полезного raw-фрагмента; fallback теперь выигрывает только когда это оправдано. Подключty semantic arbitration в tool-gate

This commit is contained in:
dctouch 2026-04-11 16:45:43 +03:00
parent 66402439dc
commit a7ccc92a9c
18 changed files with 750 additions and 26 deletions

View File

@ -2349,6 +2349,23 @@ Plan (Stage 3):
- Coverage critic threshold before final answer.
- Reason-code taxonomy normalized.
Implemented in current pass (Stage 3.1 kick-off):
1. Added strict semantic extraction contract for address predecompose:
- `address_semantic_extraction_contract_v1` built from `source_message + canonical_message + predecompose contract`.
- Captures: extraction snapshot, guard hints, quality, validity, apply-canonical recommendation, reason codes.
2. Added semantic guard integration in predecompose runtime:
- New rejection path for low-value/unsafe canonical rewrites (`normalized_fragment_rejected_semantic_guard`).
- Fallback preference updated: deterministic fallback can win over LLM canonical when semantic contract rejects rewrite.
3. Added semantic arbitration integration in tool-gate:
- `resolveAddressToolGateDecision(...)` now honors `apply_canonical_recommended`.
- New skip reason: `llm_predecompose_semantic_guard_rejected`.
4. Extended runtime/debug observability:
- semantic contract propagated through address tool-gate, deep debug payload, and living chat debug payload.
- orchestration contract now includes semantic guard fields (`semantic_contract_valid`, `semantic_apply_canonical_recommended`, `semantic_reason_codes`).
5. Added focused regression tests:
- `assistantSemanticExtractionContract.test.ts` (new)
- Existing router/chat regressions revalidated (`assistantLivingRouter.test.ts`, `assistantLivingChatMode.test.ts`).
Acceptance (Stage 3):
1. LLM outputs strictly validated schema for extraction/decomposition (no free-form).
2. Deterministic guards can block or downgrade answers when evidence insufficient.

View File

@ -1,10 +1,54 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildAddressLlmPredecomposeContractV1 = buildAddressLlmPredecomposeContractV1;
exports.buildAddressSemanticExtractionContractV1 = buildAddressSemanticExtractionContractV1;
const addressQueryClassifier_1 = require("../addressQueryClassifier");
const addressQueryShapeClassifier_1 = require("../addressQueryShapeClassifier");
const addressIntentResolver_1 = require("../addressIntentResolver");
const addressFilterExtractor_1 = require("../addressFilterExtractor");
const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN = /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu;
const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu;
const ADDRESS_SEMANTIC_SCOPE_META_PATTERN = /(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu;
const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN = /(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu;
function normalizeCompact(value) {
return String(value ?? "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
function hasSemanticDataSignal(value) {
return ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN.test(normalizeCompact(value));
}
function hasSemanticEntitySignal(value) {
return ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN.test(normalizeCompact(value));
}
function hasSemanticDataScopeMetaSignal(value) {
return ADDRESS_SEMANTIC_SCOPE_META_PATTERN.test(normalizeCompact(value));
}
function hasSemanticDeepInvestigationSignal(value) {
return ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN.test(normalizeCompact(value));
}
function requiredAnchorMissing(contract) {
const intent = contract.intent;
if (intent === "list_documents_by_counterparty" ||
intent === "bank_operations_by_counterparty" ||
intent === "list_contracts_by_counterparty") {
return !toNonEmptyString(contract.entities.counterparty);
}
if (intent === "list_documents_by_contract" || intent === "bank_operations_by_contract") {
return !toNonEmptyString(contract.entities.contract);
}
return false;
}
function deriveSemanticQuality(input) {
if (!input.valid) {
return "low";
}
if (input.unsupportedLowConfidence || input.requiredAnchorMissing || input.unknownIntentAndShape) {
return "medium";
}
return "high";
}
function toNonEmptyString(value) {
if (value === null || value === undefined) {
return null;
@ -108,3 +152,98 @@ function buildAddressLlmPredecomposeContractV1(input) {
aggregation_profile: inferAggregationProfile(intent.intent, shape.shape)
};
}
function buildAddressSemanticExtractionContractV1(input) {
const sourceMessage = String(input.sourceMessage ?? "").trim();
const canonicalMessage = String(input.canonicalMessage ?? "").trim() || sourceMessage;
const predecomposeContract = input.predecomposeContract ??
buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage
});
const sourceDataSignal = hasSemanticDataSignal(sourceMessage);
const canonicalDataSignal = hasSemanticDataSignal(canonicalMessage);
const canonicalEntitySignal = hasSemanticEntitySignal(canonicalMessage);
const dataScopeMetaSignal = hasSemanticDataScopeMetaSignal(sourceMessage);
const deepInvestigationSignal = hasSemanticDeepInvestigationSignal(sourceMessage) || hasSemanticDeepInvestigationSignal(canonicalMessage);
const unsupportedLowConfidence = predecomposeContract.mode === "unsupported" &&
(predecomposeContract.mode_confidence === "low" || predecomposeContract.mode_confidence === "medium");
const missingRequiredAnchor = requiredAnchorMissing(predecomposeContract);
const unknownIntentAndShape = predecomposeContract.intent === "unknown" && predecomposeContract.query_shape === "UNKNOWN";
const rewriteApplied = normalizeCompact(sourceMessage) !== normalizeCompact(canonicalMessage);
const semanticDriftSuspected = rewriteApplied && sourceDataSignal && !canonicalDataSignal;
const reasonCodes = [];
if (dataScopeMetaSignal) {
reasonCodes.push("data_scope_meta_query_detected");
}
if (unsupportedLowConfidence) {
reasonCodes.push("unsupported_low_confidence_contract");
}
if (missingRequiredAnchor) {
reasonCodes.push("required_anchor_missing_for_intent");
}
if (semanticDriftSuspected) {
reasonCodes.push("semantic_drift_source_vs_canonical");
}
if (unknownIntentAndShape && rewriteApplied) {
reasonCodes.push("rewrite_without_structured_gain");
}
if (deepInvestigationSignal) {
reasonCodes.push("deep_investigation_signal_detected");
}
const valid = !dataScopeMetaSignal &&
!semanticDriftSuspected &&
!(unsupportedLowConfidence && !canonicalDataSignal && !sourceDataSignal && !canonicalEntitySignal) &&
!(unknownIntentAndShape && rewriteApplied && !canonicalDataSignal && !canonicalEntitySignal);
const applyCanonicalRecommended = valid &&
!(unsupportedLowConfidence && sourceDataSignal && rewriteApplied) &&
!(missingRequiredAnchor && rewriteApplied);
const quality = deriveSemanticQuality({
valid,
unsupportedLowConfidence,
requiredAnchorMissing: missingRequiredAnchor,
unknownIntentAndShape
});
return {
schema_version: "address_semantic_extraction_contract_v1",
source_message: sourceMessage,
canonical_message: canonicalMessage,
canonical_rewrite_applied: rewriteApplied,
extraction: {
mode: predecomposeContract.mode,
mode_confidence: predecomposeContract.mode_confidence,
query_shape: predecomposeContract.query_shape,
query_shape_confidence: predecomposeContract.query_shape_confidence,
intent: predecomposeContract.intent,
intent_confidence: predecomposeContract.intent_confidence,
aggregation_profile: predecomposeContract.aggregation_profile
},
entities: {
account: predecomposeContract.entities.account,
counterparty: predecomposeContract.entities.counterparty,
contract: predecomposeContract.entities.contract,
document_type: predecomposeContract.entities.document_type,
document_ref: predecomposeContract.entities.document_ref,
organization: predecomposeContract.entities.organization
},
period: {
scope: predecomposeContract.period.scope,
period_from: predecomposeContract.period.period_from,
period_to: predecomposeContract.period.period_to,
as_of_date: predecomposeContract.period.as_of_date,
has_explicit_period: predecomposeContract.period.has_explicit_period
},
guard_hints: {
source_data_signal_detected: sourceDataSignal,
canonical_data_signal_detected: canonicalDataSignal,
data_scope_meta_query_detected: dataScopeMetaSignal,
deep_investigation_signal_detected: deepInvestigationSignal,
required_anchor_missing: missingRequiredAnchor,
unsupported_low_confidence: unsupportedLowConfidence,
semantic_drift_suspected: semanticDriftSuspected
},
quality,
valid,
apply_canonical_recommended: applyCanonicalRecommended,
reason_codes: reasonCodes
};
}

View File

@ -54,6 +54,7 @@ function normalizeLlmPreDecomposeMeta(value) {
const addressRetryAuditRaw = toRecordObject(source.addressRetryAudit);
const predecomposeContractRaw = toRecordObject(source.predecomposeContract);
const predecomposePeriodRaw = toRecordObject(predecomposeContractRaw?.period);
const semanticExtractionContractRaw = toRecordObject(source.semanticExtractionContract);
const normalized = {};
const attempted = toNullableBoolean(source.attempted);
if (attempted !== undefined)
@ -121,6 +122,24 @@ function normalizeLlmPreDecomposeMeta(value) {
};
}
}
if (semanticExtractionContractRaw) {
const valid = toNullableBoolean(semanticExtractionContractRaw.valid);
const quality = toNullableString(semanticExtractionContractRaw.quality);
const applyCanonicalRecommended = toNullableBoolean(semanticExtractionContractRaw.apply_canonical_recommended);
const reasonCodes = Array.isArray(semanticExtractionContractRaw.reason_codes)
? semanticExtractionContractRaw.reason_codes
.map((item) => toNullableString(item))
.filter((item) => Boolean(item))
: [];
if (valid !== undefined || quality || applyCanonicalRecommended !== undefined || reasonCodes.length > 0) {
normalized.semanticExtractionContract = {
valid: valid ?? null,
quality,
apply_canonical_recommended: applyCanonicalRecommended ?? null,
reason_codes: reasonCodes
};
}
}
const hasUsefulField = Object.values(normalized).some((item) => item !== undefined && item !== null);
return hasUsefulField ? normalized : null;
}

View File

@ -17,6 +17,12 @@ async function runAssistantAddressToolGateRuntime(input) {
const predecomposePeriod = predecomposeContract?.period && typeof predecomposeContract.period === "object"
? predecomposeContract.period
: null;
const semanticExtractionContract = runtimeMeta.semanticExtractionContract && typeof runtimeMeta.semanticExtractionContract === "object"
? runtimeMeta.semanticExtractionContract
: null;
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
? semanticExtractionContract?.reason_codes
: [];
input.logEvent({
timestamp: input.nowIso(),
level: "info",
@ -37,7 +43,11 @@ async function runAssistantAddressToolGateRuntime(input) {
address_tool_gate_reason: runtimeMeta.toolGateReason ?? null,
address_llm_predecompose_contract_intent: predecomposeContract?.intent ?? null,
address_llm_predecompose_contract_aggregation_profile: predecomposeContract?.aggregation_profile ?? null,
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null,
address_semantic_contract_valid: semanticExtractionContract?.valid ?? null,
address_semantic_contract_quality: semanticExtractionContract?.quality ?? null,
address_semantic_apply_canonical_recommended: semanticExtractionContract?.apply_canonical_recommended ?? null,
address_semantic_reason_codes: semanticReasonCodes
}
});
if (input.livingModeDecision?.mode === "chat") {

View File

@ -41,6 +41,12 @@ function buildAddressProcessedLogDetails(input, assistantItem) {
address_llm_predecompose_contract_intent: llmMeta?.predecomposeContract?.intent ?? null,
address_llm_predecompose_contract_aggregation_profile: llmMeta?.predecomposeContract?.aggregation_profile ?? null,
address_llm_predecompose_contract_period_scope: llmMeta?.predecomposeContract?.period?.scope ?? null,
address_semantic_contract_valid: llmMeta?.semanticExtractionContract?.valid ?? null,
address_semantic_contract_quality: llmMeta?.semanticExtractionContract?.quality ?? null,
address_semantic_apply_canonical_recommended: llmMeta?.semanticExtractionContract?.apply_canonical_recommended ?? null,
address_semantic_reason_codes: Array.isArray(llmMeta?.semanticExtractionContract?.reason_codes)
? llmMeta.semanticExtractionContract?.reason_codes
: [],
detected_mode: laneDebug.detected_mode,
query_shape: laneDebug.query_shape,
detected_intent: laneDebug.detected_intent,

View File

@ -82,6 +82,7 @@ function buildDeepAnalysisDebugPayload(input) {
address_tool_gate_decision: input.addressRuntimeMetaForDeep?.toolGateDecision ?? null,
address_tool_gate_reason: input.addressRuntimeMetaForDeep?.toolGateReason ?? null,
address_llm_predecompose_contract: input.addressRuntimeMetaForDeep?.predecomposeContract ?? null,
address_semantic_extraction_contract: input.addressRuntimeMetaForDeep?.semanticExtractionContract ?? null,
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
assistant_outcome_class_v1: input.outcomeClassV1,
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,

View File

@ -99,6 +99,9 @@ async function runAssistantLivingChatRuntime(input) {
const predecomposeContract = addressRuntimeMeta.predecomposeContract && typeof addressRuntimeMeta.predecomposeContract === "object"
? addressRuntimeMeta.predecomposeContract
: null;
const semanticExtractionContract = addressRuntimeMeta.semanticExtractionContract && typeof addressRuntimeMeta.semanticExtractionContract === "object"
? addressRuntimeMeta.semanticExtractionContract
: null;
const debug = {
trace_id: input.traceIdFactory(),
prompt_version: "living_chat_router_v1",
@ -130,6 +133,7 @@ async function runAssistantLivingChatRuntime(input) {
address_llm_predecompose_applied: Boolean(addressRuntimeMeta.applied),
address_llm_predecompose_reason: addressRuntimeMeta.reason ?? null,
address_llm_predecompose_contract: predecomposeContract,
address_semantic_extraction_contract: semanticExtractionContract,
orchestration_contract_v1: addressRuntimeMeta.orchestrationContract ?? null,
tool_gate_decision: addressRuntimeMeta.toolGateDecision ?? null,
tool_gate_reason: addressRuntimeMeta.toolGateReason ?? null,

View File

@ -2758,12 +2758,19 @@ function hasSameDateAccountFollowupSignalForPredecompose(text) {
}
function attachAddressPredecomposeContract(meta, sourceMessage) {
const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? "");
return {
...meta,
predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
sourceMessage: String(sourceMessage ?? ""),
canonicalMessage
})
});
const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(sourceMessage ?? ""),
canonicalMessage,
predecomposeContract
});
return {
...meta,
predecomposeContract,
semanticExtractionContract
};
}
async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) {
@ -2954,6 +2961,63 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
}, userMessage);
}
}
const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(userMessage ?? ""),
canonicalMessage: candidate
});
if (!semanticContractForCandidate.apply_canonical_recommended) {
const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected);
const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected &&
candidateFromNormalized &&
candidateFromNormalized === candidate &&
toNonEmptyString(candidate));
if (rawFragmentCandidatePreferred) {
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: true,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: candidate,
reason: "normalized_fragment_semantic_guard_raw_fragment_preferred",
fallbackRuleHit: null,
sanitizedUserMessage
}, userMessage);
}
if (fallbackCandidate) {
const fallbackSemanticContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(userMessage ?? ""),
canonicalMessage: String(fallbackCandidate.candidate ?? "")
});
const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase());
const sourceCompactForFallback = compactWhitespace(String(userMessage ?? "").toLowerCase());
const fallbackApplied = fallbackCompact.length > 0 && fallbackCompact !== sourceCompactForFallback;
if (fallbackApplied && fallbackSemanticContract.apply_canonical_recommended && !sourceDataSignalDetected) {
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: true,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: String(fallbackCandidate.candidate ?? ""),
reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard",
fallbackRuleHit: fallbackCandidate.rule,
sanitizedUserMessage
}, userMessage);
}
}
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: false,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: userMessage,
reason: "normalized_fragment_rejected_semantic_guard",
fallbackRuleHit: null,
sanitizedUserMessage
}, userMessage);
}
const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase());
const candidateCompact = compactWhitespace(candidate.toLowerCase());
const applied = sourceCompact !== candidateCompact;
@ -3030,13 +3094,20 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
const llmContractMode = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode);
const llmContractModeConfidence = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode_confidence);
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
? llmPreDecomposeMeta.semanticExtractionContract
: null;
const semanticCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
const llmCanonicalEntitySignal = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|customer|supplier|counterparty|company|vendor|client)/iu.test(compactWhitespace(repairedInputMessage.toLowerCase()));
const llmCanonicalAppliedSignal = Boolean(llmPreDecomposeMeta?.applied) && llmContractMode !== "deep_analysis";
const hasLlmCanonicalSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
const hasLlmCanonicalSignal = semanticCanonicalRecommended &&
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
((llmContractMode === "address_query" && llmContractModeConfidence !== "low") ||
(llmCanonicalAppliedSignal &&
(hasStrongDataIntentSignal(repairedInputMessage) || llmCanonicalEntitySignal)));
const hasLlmCanonicalDataSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
const hasLlmCanonicalDataSignal = semanticCanonicalRecommended &&
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
Boolean(llmPreDecomposeMeta?.applied) &&
(llmContractMode === "address_query" || llmContractMode === "unsupported" || llmContractMode === null) &&
hasStrongDataIntentSignal(repairedInputMessage);
@ -3057,6 +3128,17 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
const strongDataSignalFromEffectiveMessage = hasStrongDataIntentSignal(repairedInputMessage) ||
hasAccountingSignal(repairedInputMessage) ||
hasDataRetrievalRequestSignal(repairedInputMessage);
if (!semanticCanonicalRecommended &&
llmContractIntent === "unknown" &&
!followupContext &&
!hasClassifierSignal &&
!strongDataSignalFromRawMessage) {
return {
runAddressLane: false,
decision: "skip_address_lane",
reason: "llm_predecompose_semantic_guard_rejected"
};
}
if (hasUnsupportedLowConfidencePredecomposeSignal && !followupContext &&
!hasAnyAddressSignal &&
!strongDataSignalFromRawMessage &&
@ -3242,11 +3324,21 @@ function resolveAssistantOrchestrationDecision(input) {
const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample);
const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample);
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
? llmPreDecomposeMeta.semanticExtractionContract
: null;
const semanticContractValid = semanticExtractionContract?.valid !== false;
const semanticApplyCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
? semanticExtractionContract.reason_codes
: [];
const strictDeepInvestigationCueDetected = hasStrictDeepInvestigationCue(rawUserMessage) ||
hasStrictDeepInvestigationCue(repairedRawUserMessage) ||
hasStrictDeepInvestigationCue(effectiveAddressUserMessage) ||
hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage);
const keepAddressLaneByIntent = Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
const keepAddressLaneByIntent = semanticApplyCanonicalRecommended &&
Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
(llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent))) &&
!strictDeepInvestigationCueDetected;
const strongDataSignal = hasStrongDataIntentSignal(rawUserMessage) ||
@ -3446,6 +3538,9 @@ function resolveAssistantOrchestrationDecision(input) {
address_intent_confidence: intentResolution.confidence,
strong_data_signal_detected: strongDataSignal,
data_retrieval_signal_detected: dataRetrievalSignal,
semantic_contract_valid: semanticContractValid,
semantic_apply_canonical_recommended: semanticApplyCanonicalRecommended,
semantic_reason_codes: semanticReasonCodes,
followup_context_detected: Boolean(followupContext),
unsupported_address_intent_fallback_to_deep: unsupportedAddressIntentFallbackToDeep,
deep_analysis_signal_fallback_to_deep: deepAnalysisSignalFallbackToDeep,

View File

@ -13,6 +13,8 @@ export type AddressPredecomposeAggregationProfile =
| "open_items"
| "unknown";
export type AddressSemanticExtractionQuality = "high" | "medium" | "low";
export interface AddressLlmPredecomposeContractV1 {
schema_version: "address_llm_predecompose_contract_v1";
source_message: string;
@ -41,6 +43,102 @@ export interface AddressLlmPredecomposeContractV1 {
aggregation_profile: AddressPredecomposeAggregationProfile;
}
export interface AddressSemanticExtractionContractV1 {
schema_version: "address_semantic_extraction_contract_v1";
source_message: string;
canonical_message: string;
canonical_rewrite_applied: boolean;
extraction: {
mode: AddressQuestionMode;
mode_confidence: "high" | "medium" | "low";
query_shape: AddressQueryShape;
query_shape_confidence: "high" | "medium" | "low";
intent: AddressIntent;
intent_confidence: "high" | "medium" | "low";
aggregation_profile: AddressPredecomposeAggregationProfile;
};
entities: AddressLlmPredecomposeContractV1["entities"];
period: AddressLlmPredecomposeContractV1["period"];
guard_hints: {
source_data_signal_detected: boolean;
canonical_data_signal_detected: boolean;
data_scope_meta_query_detected: boolean;
deep_investigation_signal_detected: boolean;
required_anchor_missing: boolean;
unsupported_low_confidence: boolean;
semantic_drift_suspected: boolean;
};
quality: AddressSemanticExtractionQuality;
valid: boolean;
apply_canonical_recommended: boolean;
reason_codes: string[];
}
const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN =
/(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu;
const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN =
/(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu;
const ADDRESS_SEMANTIC_SCOPE_META_PATTERN =
/(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu;
const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN =
/(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu;
function normalizeCompact(value: unknown): string {
return String(value ?? "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
function hasSemanticDataSignal(value: unknown): boolean {
return ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN.test(normalizeCompact(value));
}
function hasSemanticEntitySignal(value: unknown): boolean {
return ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN.test(normalizeCompact(value));
}
function hasSemanticDataScopeMetaSignal(value: unknown): boolean {
return ADDRESS_SEMANTIC_SCOPE_META_PATTERN.test(normalizeCompact(value));
}
function hasSemanticDeepInvestigationSignal(value: unknown): boolean {
return ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN.test(normalizeCompact(value));
}
function requiredAnchorMissing(contract: AddressLlmPredecomposeContractV1): boolean {
const intent = contract.intent;
if (
intent === "list_documents_by_counterparty" ||
intent === "bank_operations_by_counterparty" ||
intent === "list_contracts_by_counterparty"
) {
return !toNonEmptyString(contract.entities.counterparty);
}
if (intent === "list_documents_by_contract" || intent === "bank_operations_by_contract") {
return !toNonEmptyString(contract.entities.contract);
}
return false;
}
function deriveSemanticQuality(input: {
valid: boolean;
unsupportedLowConfidence: boolean;
requiredAnchorMissing: boolean;
unknownIntentAndShape: boolean;
}): AddressSemanticExtractionQuality {
if (!input.valid) {
return "low";
}
if (input.unsupportedLowConfidence || input.requiredAnchorMissing || input.unknownIntentAndShape) {
return "medium";
}
return "high";
}
function toNonEmptyString(value: unknown): string | null {
if (value === null || value === undefined) {
return null;
@ -167,3 +265,115 @@ export function buildAddressLlmPredecomposeContractV1(input: {
aggregation_profile: inferAggregationProfile(intent.intent, shape.shape)
};
}
export function buildAddressSemanticExtractionContractV1(input: {
sourceMessage: string;
canonicalMessage: string;
predecomposeContract?: AddressLlmPredecomposeContractV1 | null;
}): AddressSemanticExtractionContractV1 {
const sourceMessage = String(input.sourceMessage ?? "").trim();
const canonicalMessage = String(input.canonicalMessage ?? "").trim() || sourceMessage;
const predecomposeContract =
input.predecomposeContract ??
buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage
});
const sourceDataSignal = hasSemanticDataSignal(sourceMessage);
const canonicalDataSignal = hasSemanticDataSignal(canonicalMessage);
const canonicalEntitySignal = hasSemanticEntitySignal(canonicalMessage);
const dataScopeMetaSignal = hasSemanticDataScopeMetaSignal(sourceMessage);
const deepInvestigationSignal =
hasSemanticDeepInvestigationSignal(sourceMessage) || hasSemanticDeepInvestigationSignal(canonicalMessage);
const unsupportedLowConfidence =
predecomposeContract.mode === "unsupported" &&
(predecomposeContract.mode_confidence === "low" || predecomposeContract.mode_confidence === "medium");
const missingRequiredAnchor = requiredAnchorMissing(predecomposeContract);
const unknownIntentAndShape =
predecomposeContract.intent === "unknown" && predecomposeContract.query_shape === "UNKNOWN";
const rewriteApplied = normalizeCompact(sourceMessage) !== normalizeCompact(canonicalMessage);
const semanticDriftSuspected = rewriteApplied && sourceDataSignal && !canonicalDataSignal;
const reasonCodes: string[] = [];
if (dataScopeMetaSignal) {
reasonCodes.push("data_scope_meta_query_detected");
}
if (unsupportedLowConfidence) {
reasonCodes.push("unsupported_low_confidence_contract");
}
if (missingRequiredAnchor) {
reasonCodes.push("required_anchor_missing_for_intent");
}
if (semanticDriftSuspected) {
reasonCodes.push("semantic_drift_source_vs_canonical");
}
if (unknownIntentAndShape && rewriteApplied) {
reasonCodes.push("rewrite_without_structured_gain");
}
if (deepInvestigationSignal) {
reasonCodes.push("deep_investigation_signal_detected");
}
const valid =
!dataScopeMetaSignal &&
!semanticDriftSuspected &&
!(unsupportedLowConfidence && !canonicalDataSignal && !sourceDataSignal && !canonicalEntitySignal) &&
!(unknownIntentAndShape && rewriteApplied && !canonicalDataSignal && !canonicalEntitySignal);
const applyCanonicalRecommended =
valid &&
!(unsupportedLowConfidence && sourceDataSignal && rewriteApplied) &&
!(missingRequiredAnchor && rewriteApplied);
const quality = deriveSemanticQuality({
valid,
unsupportedLowConfidence,
requiredAnchorMissing: missingRequiredAnchor,
unknownIntentAndShape
});
return {
schema_version: "address_semantic_extraction_contract_v1",
source_message: sourceMessage,
canonical_message: canonicalMessage,
canonical_rewrite_applied: rewriteApplied,
extraction: {
mode: predecomposeContract.mode,
mode_confidence: predecomposeContract.mode_confidence,
query_shape: predecomposeContract.query_shape,
query_shape_confidence: predecomposeContract.query_shape_confidence,
intent: predecomposeContract.intent,
intent_confidence: predecomposeContract.intent_confidence,
aggregation_profile: predecomposeContract.aggregation_profile
},
entities: {
account: predecomposeContract.entities.account,
counterparty: predecomposeContract.entities.counterparty,
contract: predecomposeContract.entities.contract,
document_type: predecomposeContract.entities.document_type,
document_ref: predecomposeContract.entities.document_ref,
organization: predecomposeContract.entities.organization
},
period: {
scope: predecomposeContract.period.scope,
period_from: predecomposeContract.period.period_from,
period_to: predecomposeContract.period.period_to,
as_of_date: predecomposeContract.period.as_of_date,
has_explicit_period: predecomposeContract.period.has_explicit_period
},
guard_hints: {
source_data_signal_detected: sourceDataSignal,
canonical_data_signal_detected: canonicalDataSignal,
data_scope_meta_query_detected: dataScopeMetaSignal,
deep_investigation_signal_detected: deepInvestigationSignal,
required_anchor_missing: missingRequiredAnchor,
unsupported_low_confidence: unsupportedLowConfidence,
semantic_drift_suspected: semanticDriftSuspected
},
quality,
valid,
apply_canonical_recommended: applyCanonicalRecommended,
reason_codes: reasonCodes
};
}

View File

@ -104,6 +104,7 @@ function normalizeLlmPreDecomposeMeta(value: unknown): AddressLlmPreDecomposeMet
const addressRetryAuditRaw = toRecordObject(source.addressRetryAudit);
const predecomposeContractRaw = toRecordObject(source.predecomposeContract);
const predecomposePeriodRaw = toRecordObject(predecomposeContractRaw?.period);
const semanticExtractionContractRaw = toRecordObject(source.semanticExtractionContract);
const normalized: AddressLlmPreDecomposeMetaLogInput = {};
@ -170,6 +171,27 @@ function normalizeLlmPreDecomposeMeta(value: unknown): AddressLlmPreDecomposeMet
}
}
if (semanticExtractionContractRaw) {
const valid = toNullableBoolean(semanticExtractionContractRaw.valid);
const quality = toNullableString(semanticExtractionContractRaw.quality);
const applyCanonicalRecommended = toNullableBoolean(
semanticExtractionContractRaw.apply_canonical_recommended
);
const reasonCodes = Array.isArray(semanticExtractionContractRaw.reason_codes)
? semanticExtractionContractRaw.reason_codes
.map((item) => toNullableString(item))
.filter((item): item is string => Boolean(item))
: [];
if (valid !== undefined || quality || applyCanonicalRecommended !== undefined || reasonCodes.length > 0) {
normalized.semanticExtractionContract = {
valid: valid ?? null,
quality,
apply_canonical_recommended: applyCanonicalRecommended ?? null,
reason_codes: reasonCodes
};
}
}
const hasUsefulField = Object.values(normalized).some((item) => item !== undefined && item !== null);
return hasUsefulField ? normalized : null;
}

View File

@ -26,6 +26,12 @@ export interface AssistantAddressToolGateRuntimeInput<ResponseType = unknown> {
scope?: unknown;
} | null;
} | null;
semanticExtractionContract?: {
valid?: unknown;
quality?: unknown;
apply_canonical_recommended?: unknown;
reason_codes?: unknown;
} | null;
[key: string]: unknown;
} | null;
logEvent: (payload: Record<string, unknown>) => void;
@ -67,6 +73,18 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
predecomposeContract?.period && typeof predecomposeContract.period === "object"
? predecomposeContract.period
: null;
const semanticExtractionContract =
runtimeMeta.semanticExtractionContract && typeof runtimeMeta.semanticExtractionContract === "object"
? (runtimeMeta.semanticExtractionContract as {
valid?: unknown;
quality?: unknown;
apply_canonical_recommended?: unknown;
reason_codes?: unknown;
})
: null;
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
? semanticExtractionContract?.reason_codes
: [];
input.logEvent({
timestamp: input.nowIso(),
@ -88,7 +106,12 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
address_tool_gate_reason: runtimeMeta.toolGateReason ?? null,
address_llm_predecompose_contract_intent: predecomposeContract?.intent ?? null,
address_llm_predecompose_contract_aggregation_profile: predecomposeContract?.aggregation_profile ?? null,
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null,
address_semantic_contract_valid: semanticExtractionContract?.valid ?? null,
address_semantic_contract_quality: semanticExtractionContract?.quality ?? null,
address_semantic_apply_canonical_recommended:
semanticExtractionContract?.apply_canonical_recommended ?? null,
address_semantic_reason_codes: semanticReasonCodes
}
});
@ -107,4 +130,3 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
response: null
};
}

View File

@ -41,6 +41,12 @@ export interface AddressLlmPreDecomposeMetaLogInput {
scope?: string | null;
} | null;
} | null;
semanticExtractionContract?: {
valid?: boolean | null;
quality?: string | null;
apply_canonical_recommended?: boolean | null;
reason_codes?: string[] | null;
} | null;
}
export interface FinalizeAssistantAddressTurnInput {
@ -109,6 +115,13 @@ function buildAddressProcessedLogDetails(input: FinalizeAssistantAddressTurnInpu
address_llm_predecompose_contract_intent: llmMeta?.predecomposeContract?.intent ?? null,
address_llm_predecompose_contract_aggregation_profile: llmMeta?.predecomposeContract?.aggregation_profile ?? null,
address_llm_predecompose_contract_period_scope: llmMeta?.predecomposeContract?.period?.scope ?? null,
address_semantic_contract_valid: llmMeta?.semanticExtractionContract?.valid ?? null,
address_semantic_contract_quality: llmMeta?.semanticExtractionContract?.quality ?? null,
address_semantic_apply_canonical_recommended:
llmMeta?.semanticExtractionContract?.apply_canonical_recommended ?? null,
address_semantic_reason_codes: Array.isArray(llmMeta?.semanticExtractionContract?.reason_codes)
? llmMeta.semanticExtractionContract?.reason_codes
: [],
detected_mode: laneDebug.detected_mode,
query_shape: laneDebug.query_shape,
detected_intent: laneDebug.detected_intent,

View File

@ -161,6 +161,7 @@ export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInp
address_tool_gate_decision: input.addressRuntimeMetaForDeep?.toolGateDecision ?? null,
address_tool_gate_reason: input.addressRuntimeMetaForDeep?.toolGateReason ?? null,
address_llm_predecompose_contract: input.addressRuntimeMetaForDeep?.predecomposeContract ?? null,
address_semantic_extraction_contract: input.addressRuntimeMetaForDeep?.semanticExtractionContract ?? null,
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
assistant_outcome_class_v1: input.outcomeClassV1,
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,

View File

@ -159,6 +159,10 @@ export async function runAssistantLivingChatRuntime(
addressRuntimeMeta.predecomposeContract && typeof addressRuntimeMeta.predecomposeContract === "object"
? (addressRuntimeMeta.predecomposeContract as Record<string, unknown>)
: null;
const semanticExtractionContract =
addressRuntimeMeta.semanticExtractionContract && typeof addressRuntimeMeta.semanticExtractionContract === "object"
? (addressRuntimeMeta.semanticExtractionContract as Record<string, unknown>)
: null;
const debug: Record<string, unknown> = {
trace_id: input.traceIdFactory(),
@ -191,6 +195,7 @@ export async function runAssistantLivingChatRuntime(
address_llm_predecompose_applied: Boolean(addressRuntimeMeta.applied),
address_llm_predecompose_reason: addressRuntimeMeta.reason ?? null,
address_llm_predecompose_contract: predecomposeContract,
address_semantic_extraction_contract: semanticExtractionContract,
orchestration_contract_v1: addressRuntimeMeta.orchestrationContract ?? null,
tool_gate_decision: addressRuntimeMeta.toolGateDecision ?? null,
tool_gate_reason: addressRuntimeMeta.toolGateReason ?? null,

View File

@ -2714,12 +2714,19 @@ function hasSameDateAccountFollowupSignalForPredecompose(text) {
}
function attachAddressPredecomposeContract(meta, sourceMessage) {
const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? "");
return {
...meta,
predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
sourceMessage: String(sourceMessage ?? ""),
canonicalMessage
})
});
const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(sourceMessage ?? ""),
canonicalMessage,
predecomposeContract
});
return {
...meta,
predecomposeContract,
semanticExtractionContract
};
}
async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) {
@ -2910,6 +2917,63 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
}, userMessage);
}
}
const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(userMessage ?? ""),
canonicalMessage: candidate
});
if (!semanticContractForCandidate.apply_canonical_recommended) {
const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected);
const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected &&
candidateFromNormalized &&
candidateFromNormalized === candidate &&
toNonEmptyString(candidate));
if (rawFragmentCandidatePreferred) {
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: true,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: candidate,
reason: "normalized_fragment_semantic_guard_raw_fragment_preferred",
fallbackRuleHit: null,
sanitizedUserMessage
}, userMessage);
}
if (fallbackCandidate) {
const fallbackSemanticContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
sourceMessage: String(userMessage ?? ""),
canonicalMessage: String(fallbackCandidate.candidate ?? "")
});
const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase());
const sourceCompactForFallback = compactWhitespace(String(userMessage ?? "").toLowerCase());
const fallbackApplied = fallbackCompact.length > 0 && fallbackCompact !== sourceCompactForFallback;
if (fallbackApplied && fallbackSemanticContract.apply_canonical_recommended && !sourceDataSignalDetected) {
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: true,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: String(fallbackCandidate.candidate ?? ""),
reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard",
fallbackRuleHit: fallbackCandidate.rule,
sanitizedUserMessage
}, userMessage);
}
}
return attachAddressPredecomposeContract({
...baseMeta,
attempted: true,
applied: false,
traceId: normalized?.trace_id ?? null,
llmCanonicalCandidateDetected: true,
effectiveMessage: userMessage,
reason: "normalized_fragment_rejected_semantic_guard",
fallbackRuleHit: null,
sanitizedUserMessage
}, userMessage);
}
const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase());
const candidateCompact = compactWhitespace(candidate.toLowerCase());
const applied = sourceCompact !== candidateCompact;
@ -2986,13 +3050,20 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
const llmContractMode = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode);
const llmContractModeConfidence = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode_confidence);
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
? llmPreDecomposeMeta.semanticExtractionContract
: null;
const semanticCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
const llmCanonicalEntitySignal = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|customer|supplier|counterparty|company|vendor|client)/iu.test(compactWhitespace(repairedInputMessage.toLowerCase()));
const llmCanonicalAppliedSignal = Boolean(llmPreDecomposeMeta?.applied) && llmContractMode !== "deep_analysis";
const hasLlmCanonicalSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
const hasLlmCanonicalSignal = semanticCanonicalRecommended &&
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
((llmContractMode === "address_query" && llmContractModeConfidence !== "low") ||
(llmCanonicalAppliedSignal &&
(hasStrongDataIntentSignal(repairedInputMessage) || llmCanonicalEntitySignal)));
const hasLlmCanonicalDataSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
const hasLlmCanonicalDataSignal = semanticCanonicalRecommended &&
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
Boolean(llmPreDecomposeMeta?.applied) &&
(llmContractMode === "address_query" || llmContractMode === "unsupported" || llmContractMode === null) &&
hasStrongDataIntentSignal(repairedInputMessage);
@ -3013,6 +3084,17 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
const strongDataSignalFromEffectiveMessage = hasStrongDataIntentSignal(repairedInputMessage) ||
hasAccountingSignal(repairedInputMessage) ||
hasDataRetrievalRequestSignal(repairedInputMessage);
if (!semanticCanonicalRecommended &&
llmContractIntent === "unknown" &&
!followupContext &&
!hasClassifierSignal &&
!strongDataSignalFromRawMessage) {
return {
runAddressLane: false,
decision: "skip_address_lane",
reason: "llm_predecompose_semantic_guard_rejected"
};
}
if (hasUnsupportedLowConfidencePredecomposeSignal && !followupContext &&
!hasAnyAddressSignal &&
!strongDataSignalFromRawMessage &&
@ -3198,11 +3280,21 @@ export function resolveAssistantOrchestrationDecision(input) {
const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample);
const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample);
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
? llmPreDecomposeMeta.semanticExtractionContract
: null;
const semanticContractValid = semanticExtractionContract?.valid !== false;
const semanticApplyCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
? semanticExtractionContract.reason_codes
: [];
const strictDeepInvestigationCueDetected = hasStrictDeepInvestigationCue(rawUserMessage) ||
hasStrictDeepInvestigationCue(repairedRawUserMessage) ||
hasStrictDeepInvestigationCue(effectiveAddressUserMessage) ||
hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage);
const keepAddressLaneByIntent = Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
const keepAddressLaneByIntent = semanticApplyCanonicalRecommended &&
Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
(llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent))) &&
!strictDeepInvestigationCueDetected;
const strongDataSignal = hasStrongDataIntentSignal(rawUserMessage) ||
@ -3402,6 +3494,9 @@ export function resolveAssistantOrchestrationDecision(input) {
address_intent_confidence: intentResolution.confidence,
strong_data_signal_detected: strongDataSignal,
data_retrieval_signal_detected: dataRetrievalSignal,
semantic_contract_valid: semanticContractValid,
semantic_apply_canonical_recommended: semanticApplyCanonicalRecommended,
semantic_reason_codes: semanticReasonCodes,
followup_context_detected: Boolean(followupContext),
unsupported_address_intent_fallback_to_deep: unsupportedAddressIntentFallbackToDeep,
deep_analysis_signal_fallback_to_deep: deepAnalysisSignalFallbackToDeep,

View File

@ -70,6 +70,7 @@ export interface AssistantAddressRuntimeMetaForDeep {
toolGateDecision?: string | null;
toolGateReason?: string | null;
predecomposeContract?: Record<string, unknown> | null;
semanticExtractionContract?: Record<string, unknown> | null;
orchestrationContract?: Record<string, unknown> | null;
}

View File

@ -1047,14 +1047,16 @@ describe("assistant address llm pre-decompose candidate preference", () => {
} as any);
expect(response.ok).toBe(true);
expect(response.reply_type).toBe("factual");
expect(calls).toHaveLength(1);
expect(calls[0].message).toBe("заказчики компании svk");
expect(response.debug?.llm_decomposition_attempted).toBe(true);
expect(response.debug?.llm_decomposition_applied).toBe(true);
expect(response.debug?.llm_canonical_candidate_detected).toBe(true);
expect(response.debug?.tool_gate_decision).toBe("run_address_lane");
expect(["llm_canonical_candidate_detected", "llm_canonical_data_signal_detected", "address_mode_classifier_detected"]).toContain(response.debug?.tool_gate_reason);
expect(response.reply_type).toBe("clarification_required");
expect(calls).toHaveLength(0);
expect(response.debug?.address_tool_gate_decision).toBe("skip_address_lane");
expect(
[
"llm_predecompose_semantic_guard_rejected",
"llm_predecompose_unsupported_mode",
"address_signal_unsupported_intent_fallback_to_deep"
]
).toContain(response.debug?.address_tool_gate_reason);
});
it("normalizes short ordinal year like '20й' in noisy docs phrasing", async () => {

View File

@ -0,0 +1,62 @@
import { describe, expect, it } from "vitest";
import {
buildAddressLlmPredecomposeContractV1,
buildAddressSemanticExtractionContractV1
} from "../src/services/address_runtime/predecomposeContract";
describe("address semantic extraction contract", () => {
it("rejects low-confidence unsupported rewrite without data signal", () => {
const sourceMessage = "yo";
const canonicalMessage = "yoft";
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage
});
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage,
predecomposeContract
});
expect(semantic.schema_version).toBe("address_semantic_extraction_contract_v1");
expect(semantic.guard_hints.source_data_signal_detected).toBe(false);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
expect(semantic.guard_hints.unsupported_low_confidence).toBe(true);
expect(semantic.valid).toBe(false);
expect(semantic.apply_canonical_recommended).toBe(false);
expect(semantic.reason_codes).toContain("unsupported_low_confidence_contract");
});
it("flags semantic drift when canonical loses data intent", () => {
const sourceMessage = "покажи документы по договору 12";
const canonicalMessage = "помоги разобраться";
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage
});
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
expect(semantic.guard_hints.semantic_drift_suspected).toBe(true);
expect(semantic.valid).toBe(false);
expect(semantic.apply_canonical_recommended).toBe(false);
expect(semantic.reason_codes).toContain("semantic_drift_source_vs_canonical");
});
it("keeps canonical rewrite when semantic contract remains coherent", () => {
const sourceMessage = "Покажи незакрытые договоры на 2020-12-31";
const canonicalMessage = "Показать незакрытые договоры по состоянию на конец декабря 2020 года.";
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage
});
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(true);
expect(semantic.valid).toBe(true);
expect(semantic.apply_canonical_recommended).toBe(true);
expect(["high", "medium"]).toContain(semantic.quality);
});
});