ГЛОБАЛЬНЫЙ РЕФАКТОРИНГ АРХИТЕКТУРЫ - Рефакторинг этапов 3.1 - Добавлен строгий контракт семантического извлечения (валидность, качество, reason-codes, рекомендация применять canonical). Подключен semantic guard в predecompose/runtime: мягкое отклонение плохого canonical: отдельный safe-кейс для сырого полезного raw-фрагмента; fallback теперь выигрывает только когда это оправдано. Подключty semantic arbitration в tool-gate
This commit is contained in:
parent
66402439dc
commit
a7ccc92a9c
|
|
@ -2349,6 +2349,23 @@ Plan (Stage 3):
|
|||
- Coverage critic threshold before final answer.
|
||||
- Reason-code taxonomy normalized.
|
||||
|
||||
Implemented in current pass (Stage 3.1 kick-off):
|
||||
1. Added strict semantic extraction contract for address predecompose:
|
||||
- `address_semantic_extraction_contract_v1` built from `source_message + canonical_message + predecompose contract`.
|
||||
- Captures: extraction snapshot, guard hints, quality, validity, apply-canonical recommendation, reason codes.
|
||||
2. Added semantic guard integration in predecompose runtime:
|
||||
- New rejection path for low-value/unsafe canonical rewrites (`normalized_fragment_rejected_semantic_guard`).
|
||||
- Fallback preference updated: deterministic fallback can win over LLM canonical when semantic contract rejects rewrite.
|
||||
3. Added semantic arbitration integration in tool-gate:
|
||||
- `resolveAddressToolGateDecision(...)` now honors `apply_canonical_recommended`.
|
||||
- New skip reason: `llm_predecompose_semantic_guard_rejected`.
|
||||
4. Extended runtime/debug observability:
|
||||
- semantic contract propagated through address tool-gate, deep debug payload, and living chat debug payload.
|
||||
- orchestration contract now includes semantic guard fields (`semantic_contract_valid`, `semantic_apply_canonical_recommended`, `semantic_reason_codes`).
|
||||
5. Added focused regression tests:
|
||||
- `assistantSemanticExtractionContract.test.ts` (new)
|
||||
- Existing router/chat regressions revalidated (`assistantLivingRouter.test.ts`, `assistantLivingChatMode.test.ts`).
|
||||
|
||||
Acceptance (Stage 3):
|
||||
1. LLM outputs strictly validated schema for extraction/decomposition (no free-form).
|
||||
2. Deterministic guards can block or downgrade answers when evidence insufficient.
|
||||
|
|
|
|||
|
|
@ -1,10 +1,54 @@
|
|||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.buildAddressLlmPredecomposeContractV1 = buildAddressLlmPredecomposeContractV1;
|
||||
exports.buildAddressSemanticExtractionContractV1 = buildAddressSemanticExtractionContractV1;
|
||||
const addressQueryClassifier_1 = require("../addressQueryClassifier");
|
||||
const addressQueryShapeClassifier_1 = require("../addressQueryShapeClassifier");
|
||||
const addressIntentResolver_1 = require("../addressIntentResolver");
|
||||
const addressFilterExtractor_1 = require("../addressFilterExtractor");
|
||||
const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN = /(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu;
|
||||
const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu;
|
||||
const ADDRESS_SEMANTIC_SCOPE_META_PATTERN = /(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu;
|
||||
const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN = /(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu;
|
||||
function normalizeCompact(value) {
|
||||
return String(value ?? "")
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
function hasSemanticDataSignal(value) {
|
||||
return ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
function hasSemanticEntitySignal(value) {
|
||||
return ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
function hasSemanticDataScopeMetaSignal(value) {
|
||||
return ADDRESS_SEMANTIC_SCOPE_META_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
function hasSemanticDeepInvestigationSignal(value) {
|
||||
return ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
function requiredAnchorMissing(contract) {
|
||||
const intent = contract.intent;
|
||||
if (intent === "list_documents_by_counterparty" ||
|
||||
intent === "bank_operations_by_counterparty" ||
|
||||
intent === "list_contracts_by_counterparty") {
|
||||
return !toNonEmptyString(contract.entities.counterparty);
|
||||
}
|
||||
if (intent === "list_documents_by_contract" || intent === "bank_operations_by_contract") {
|
||||
return !toNonEmptyString(contract.entities.contract);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
function deriveSemanticQuality(input) {
|
||||
if (!input.valid) {
|
||||
return "low";
|
||||
}
|
||||
if (input.unsupportedLowConfidence || input.requiredAnchorMissing || input.unknownIntentAndShape) {
|
||||
return "medium";
|
||||
}
|
||||
return "high";
|
||||
}
|
||||
function toNonEmptyString(value) {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
|
|
@ -108,3 +152,98 @@ function buildAddressLlmPredecomposeContractV1(input) {
|
|||
aggregation_profile: inferAggregationProfile(intent.intent, shape.shape)
|
||||
};
|
||||
}
|
||||
function buildAddressSemanticExtractionContractV1(input) {
|
||||
const sourceMessage = String(input.sourceMessage ?? "").trim();
|
||||
const canonicalMessage = String(input.canonicalMessage ?? "").trim() || sourceMessage;
|
||||
const predecomposeContract = input.predecomposeContract ??
|
||||
buildAddressLlmPredecomposeContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage
|
||||
});
|
||||
const sourceDataSignal = hasSemanticDataSignal(sourceMessage);
|
||||
const canonicalDataSignal = hasSemanticDataSignal(canonicalMessage);
|
||||
const canonicalEntitySignal = hasSemanticEntitySignal(canonicalMessage);
|
||||
const dataScopeMetaSignal = hasSemanticDataScopeMetaSignal(sourceMessage);
|
||||
const deepInvestigationSignal = hasSemanticDeepInvestigationSignal(sourceMessage) || hasSemanticDeepInvestigationSignal(canonicalMessage);
|
||||
const unsupportedLowConfidence = predecomposeContract.mode === "unsupported" &&
|
||||
(predecomposeContract.mode_confidence === "low" || predecomposeContract.mode_confidence === "medium");
|
||||
const missingRequiredAnchor = requiredAnchorMissing(predecomposeContract);
|
||||
const unknownIntentAndShape = predecomposeContract.intent === "unknown" && predecomposeContract.query_shape === "UNKNOWN";
|
||||
const rewriteApplied = normalizeCompact(sourceMessage) !== normalizeCompact(canonicalMessage);
|
||||
const semanticDriftSuspected = rewriteApplied && sourceDataSignal && !canonicalDataSignal;
|
||||
const reasonCodes = [];
|
||||
if (dataScopeMetaSignal) {
|
||||
reasonCodes.push("data_scope_meta_query_detected");
|
||||
}
|
||||
if (unsupportedLowConfidence) {
|
||||
reasonCodes.push("unsupported_low_confidence_contract");
|
||||
}
|
||||
if (missingRequiredAnchor) {
|
||||
reasonCodes.push("required_anchor_missing_for_intent");
|
||||
}
|
||||
if (semanticDriftSuspected) {
|
||||
reasonCodes.push("semantic_drift_source_vs_canonical");
|
||||
}
|
||||
if (unknownIntentAndShape && rewriteApplied) {
|
||||
reasonCodes.push("rewrite_without_structured_gain");
|
||||
}
|
||||
if (deepInvestigationSignal) {
|
||||
reasonCodes.push("deep_investigation_signal_detected");
|
||||
}
|
||||
const valid = !dataScopeMetaSignal &&
|
||||
!semanticDriftSuspected &&
|
||||
!(unsupportedLowConfidence && !canonicalDataSignal && !sourceDataSignal && !canonicalEntitySignal) &&
|
||||
!(unknownIntentAndShape && rewriteApplied && !canonicalDataSignal && !canonicalEntitySignal);
|
||||
const applyCanonicalRecommended = valid &&
|
||||
!(unsupportedLowConfidence && sourceDataSignal && rewriteApplied) &&
|
||||
!(missingRequiredAnchor && rewriteApplied);
|
||||
const quality = deriveSemanticQuality({
|
||||
valid,
|
||||
unsupportedLowConfidence,
|
||||
requiredAnchorMissing: missingRequiredAnchor,
|
||||
unknownIntentAndShape
|
||||
});
|
||||
return {
|
||||
schema_version: "address_semantic_extraction_contract_v1",
|
||||
source_message: sourceMessage,
|
||||
canonical_message: canonicalMessage,
|
||||
canonical_rewrite_applied: rewriteApplied,
|
||||
extraction: {
|
||||
mode: predecomposeContract.mode,
|
||||
mode_confidence: predecomposeContract.mode_confidence,
|
||||
query_shape: predecomposeContract.query_shape,
|
||||
query_shape_confidence: predecomposeContract.query_shape_confidence,
|
||||
intent: predecomposeContract.intent,
|
||||
intent_confidence: predecomposeContract.intent_confidence,
|
||||
aggregation_profile: predecomposeContract.aggregation_profile
|
||||
},
|
||||
entities: {
|
||||
account: predecomposeContract.entities.account,
|
||||
counterparty: predecomposeContract.entities.counterparty,
|
||||
contract: predecomposeContract.entities.contract,
|
||||
document_type: predecomposeContract.entities.document_type,
|
||||
document_ref: predecomposeContract.entities.document_ref,
|
||||
organization: predecomposeContract.entities.organization
|
||||
},
|
||||
period: {
|
||||
scope: predecomposeContract.period.scope,
|
||||
period_from: predecomposeContract.period.period_from,
|
||||
period_to: predecomposeContract.period.period_to,
|
||||
as_of_date: predecomposeContract.period.as_of_date,
|
||||
has_explicit_period: predecomposeContract.period.has_explicit_period
|
||||
},
|
||||
guard_hints: {
|
||||
source_data_signal_detected: sourceDataSignal,
|
||||
canonical_data_signal_detected: canonicalDataSignal,
|
||||
data_scope_meta_query_detected: dataScopeMetaSignal,
|
||||
deep_investigation_signal_detected: deepInvestigationSignal,
|
||||
required_anchor_missing: missingRequiredAnchor,
|
||||
unsupported_low_confidence: unsupportedLowConfidence,
|
||||
semantic_drift_suspected: semanticDriftSuspected
|
||||
},
|
||||
quality,
|
||||
valid,
|
||||
apply_canonical_recommended: applyCanonicalRecommended,
|
||||
reason_codes: reasonCodes
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ function normalizeLlmPreDecomposeMeta(value) {
|
|||
const addressRetryAuditRaw = toRecordObject(source.addressRetryAudit);
|
||||
const predecomposeContractRaw = toRecordObject(source.predecomposeContract);
|
||||
const predecomposePeriodRaw = toRecordObject(predecomposeContractRaw?.period);
|
||||
const semanticExtractionContractRaw = toRecordObject(source.semanticExtractionContract);
|
||||
const normalized = {};
|
||||
const attempted = toNullableBoolean(source.attempted);
|
||||
if (attempted !== undefined)
|
||||
|
|
@ -121,6 +122,24 @@ function normalizeLlmPreDecomposeMeta(value) {
|
|||
};
|
||||
}
|
||||
}
|
||||
if (semanticExtractionContractRaw) {
|
||||
const valid = toNullableBoolean(semanticExtractionContractRaw.valid);
|
||||
const quality = toNullableString(semanticExtractionContractRaw.quality);
|
||||
const applyCanonicalRecommended = toNullableBoolean(semanticExtractionContractRaw.apply_canonical_recommended);
|
||||
const reasonCodes = Array.isArray(semanticExtractionContractRaw.reason_codes)
|
||||
? semanticExtractionContractRaw.reason_codes
|
||||
.map((item) => toNullableString(item))
|
||||
.filter((item) => Boolean(item))
|
||||
: [];
|
||||
if (valid !== undefined || quality || applyCanonicalRecommended !== undefined || reasonCodes.length > 0) {
|
||||
normalized.semanticExtractionContract = {
|
||||
valid: valid ?? null,
|
||||
quality,
|
||||
apply_canonical_recommended: applyCanonicalRecommended ?? null,
|
||||
reason_codes: reasonCodes
|
||||
};
|
||||
}
|
||||
}
|
||||
const hasUsefulField = Object.values(normalized).some((item) => item !== undefined && item !== null);
|
||||
return hasUsefulField ? normalized : null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,12 @@ async function runAssistantAddressToolGateRuntime(input) {
|
|||
const predecomposePeriod = predecomposeContract?.period && typeof predecomposeContract.period === "object"
|
||||
? predecomposeContract.period
|
||||
: null;
|
||||
const semanticExtractionContract = runtimeMeta.semanticExtractionContract && typeof runtimeMeta.semanticExtractionContract === "object"
|
||||
? runtimeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
|
||||
? semanticExtractionContract?.reason_codes
|
||||
: [];
|
||||
input.logEvent({
|
||||
timestamp: input.nowIso(),
|
||||
level: "info",
|
||||
|
|
@ -37,7 +43,11 @@ async function runAssistantAddressToolGateRuntime(input) {
|
|||
address_tool_gate_reason: runtimeMeta.toolGateReason ?? null,
|
||||
address_llm_predecompose_contract_intent: predecomposeContract?.intent ?? null,
|
||||
address_llm_predecompose_contract_aggregation_profile: predecomposeContract?.aggregation_profile ?? null,
|
||||
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null
|
||||
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null,
|
||||
address_semantic_contract_valid: semanticExtractionContract?.valid ?? null,
|
||||
address_semantic_contract_quality: semanticExtractionContract?.quality ?? null,
|
||||
address_semantic_apply_canonical_recommended: semanticExtractionContract?.apply_canonical_recommended ?? null,
|
||||
address_semantic_reason_codes: semanticReasonCodes
|
||||
}
|
||||
});
|
||||
if (input.livingModeDecision?.mode === "chat") {
|
||||
|
|
|
|||
|
|
@ -41,6 +41,12 @@ function buildAddressProcessedLogDetails(input, assistantItem) {
|
|||
address_llm_predecompose_contract_intent: llmMeta?.predecomposeContract?.intent ?? null,
|
||||
address_llm_predecompose_contract_aggregation_profile: llmMeta?.predecomposeContract?.aggregation_profile ?? null,
|
||||
address_llm_predecompose_contract_period_scope: llmMeta?.predecomposeContract?.period?.scope ?? null,
|
||||
address_semantic_contract_valid: llmMeta?.semanticExtractionContract?.valid ?? null,
|
||||
address_semantic_contract_quality: llmMeta?.semanticExtractionContract?.quality ?? null,
|
||||
address_semantic_apply_canonical_recommended: llmMeta?.semanticExtractionContract?.apply_canonical_recommended ?? null,
|
||||
address_semantic_reason_codes: Array.isArray(llmMeta?.semanticExtractionContract?.reason_codes)
|
||||
? llmMeta.semanticExtractionContract?.reason_codes
|
||||
: [],
|
||||
detected_mode: laneDebug.detected_mode,
|
||||
query_shape: laneDebug.query_shape,
|
||||
detected_intent: laneDebug.detected_intent,
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ function buildDeepAnalysisDebugPayload(input) {
|
|||
address_tool_gate_decision: input.addressRuntimeMetaForDeep?.toolGateDecision ?? null,
|
||||
address_tool_gate_reason: input.addressRuntimeMetaForDeep?.toolGateReason ?? null,
|
||||
address_llm_predecompose_contract: input.addressRuntimeMetaForDeep?.predecomposeContract ?? null,
|
||||
address_semantic_extraction_contract: input.addressRuntimeMetaForDeep?.semanticExtractionContract ?? null,
|
||||
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
|
|
|
|||
|
|
@ -99,6 +99,9 @@ async function runAssistantLivingChatRuntime(input) {
|
|||
const predecomposeContract = addressRuntimeMeta.predecomposeContract && typeof addressRuntimeMeta.predecomposeContract === "object"
|
||||
? addressRuntimeMeta.predecomposeContract
|
||||
: null;
|
||||
const semanticExtractionContract = addressRuntimeMeta.semanticExtractionContract && typeof addressRuntimeMeta.semanticExtractionContract === "object"
|
||||
? addressRuntimeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const debug = {
|
||||
trace_id: input.traceIdFactory(),
|
||||
prompt_version: "living_chat_router_v1",
|
||||
|
|
@ -130,6 +133,7 @@ async function runAssistantLivingChatRuntime(input) {
|
|||
address_llm_predecompose_applied: Boolean(addressRuntimeMeta.applied),
|
||||
address_llm_predecompose_reason: addressRuntimeMeta.reason ?? null,
|
||||
address_llm_predecompose_contract: predecomposeContract,
|
||||
address_semantic_extraction_contract: semanticExtractionContract,
|
||||
orchestration_contract_v1: addressRuntimeMeta.orchestrationContract ?? null,
|
||||
tool_gate_decision: addressRuntimeMeta.toolGateDecision ?? null,
|
||||
tool_gate_reason: addressRuntimeMeta.toolGateReason ?? null,
|
||||
|
|
|
|||
|
|
@ -2758,12 +2758,19 @@ function hasSameDateAccountFollowupSignalForPredecompose(text) {
|
|||
}
|
||||
function attachAddressPredecomposeContract(meta, sourceMessage) {
|
||||
const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? "");
|
||||
const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage
|
||||
});
|
||||
const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage,
|
||||
predecomposeContract
|
||||
});
|
||||
return {
|
||||
...meta,
|
||||
predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage
|
||||
})
|
||||
predecomposeContract,
|
||||
semanticExtractionContract
|
||||
};
|
||||
}
|
||||
async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) {
|
||||
|
|
@ -2954,6 +2961,63 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
|
|||
}, userMessage);
|
||||
}
|
||||
}
|
||||
const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(userMessage ?? ""),
|
||||
canonicalMessage: candidate
|
||||
});
|
||||
if (!semanticContractForCandidate.apply_canonical_recommended) {
|
||||
const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected);
|
||||
const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected &&
|
||||
candidateFromNormalized &&
|
||||
candidateFromNormalized === candidate &&
|
||||
toNonEmptyString(candidate));
|
||||
if (rawFragmentCandidatePreferred) {
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: true,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: candidate,
|
||||
reason: "normalized_fragment_semantic_guard_raw_fragment_preferred",
|
||||
fallbackRuleHit: null,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
if (fallbackCandidate) {
|
||||
const fallbackSemanticContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(userMessage ?? ""),
|
||||
canonicalMessage: String(fallbackCandidate.candidate ?? "")
|
||||
});
|
||||
const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase());
|
||||
const sourceCompactForFallback = compactWhitespace(String(userMessage ?? "").toLowerCase());
|
||||
const fallbackApplied = fallbackCompact.length > 0 && fallbackCompact !== sourceCompactForFallback;
|
||||
if (fallbackApplied && fallbackSemanticContract.apply_canonical_recommended && !sourceDataSignalDetected) {
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: true,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: String(fallbackCandidate.candidate ?? ""),
|
||||
reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard",
|
||||
fallbackRuleHit: fallbackCandidate.rule,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
}
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: false,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: userMessage,
|
||||
reason: "normalized_fragment_rejected_semantic_guard",
|
||||
fallbackRuleHit: null,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase());
|
||||
const candidateCompact = compactWhitespace(candidate.toLowerCase());
|
||||
const applied = sourceCompact !== candidateCompact;
|
||||
|
|
@ -3030,13 +3094,20 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
|
|||
const llmContractMode = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode);
|
||||
const llmContractModeConfidence = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode_confidence);
|
||||
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
|
||||
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
|
||||
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
|
||||
? llmPreDecomposeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const semanticCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
|
||||
const llmCanonicalEntitySignal = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|customer|supplier|counterparty|company|vendor|client)/iu.test(compactWhitespace(repairedInputMessage.toLowerCase()));
|
||||
const llmCanonicalAppliedSignal = Boolean(llmPreDecomposeMeta?.applied) && llmContractMode !== "deep_analysis";
|
||||
const hasLlmCanonicalSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
const hasLlmCanonicalSignal = semanticCanonicalRecommended &&
|
||||
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
((llmContractMode === "address_query" && llmContractModeConfidence !== "low") ||
|
||||
(llmCanonicalAppliedSignal &&
|
||||
(hasStrongDataIntentSignal(repairedInputMessage) || llmCanonicalEntitySignal)));
|
||||
const hasLlmCanonicalDataSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
const hasLlmCanonicalDataSignal = semanticCanonicalRecommended &&
|
||||
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
Boolean(llmPreDecomposeMeta?.applied) &&
|
||||
(llmContractMode === "address_query" || llmContractMode === "unsupported" || llmContractMode === null) &&
|
||||
hasStrongDataIntentSignal(repairedInputMessage);
|
||||
|
|
@ -3057,6 +3128,17 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
|
|||
const strongDataSignalFromEffectiveMessage = hasStrongDataIntentSignal(repairedInputMessage) ||
|
||||
hasAccountingSignal(repairedInputMessage) ||
|
||||
hasDataRetrievalRequestSignal(repairedInputMessage);
|
||||
if (!semanticCanonicalRecommended &&
|
||||
llmContractIntent === "unknown" &&
|
||||
!followupContext &&
|
||||
!hasClassifierSignal &&
|
||||
!strongDataSignalFromRawMessage) {
|
||||
return {
|
||||
runAddressLane: false,
|
||||
decision: "skip_address_lane",
|
||||
reason: "llm_predecompose_semantic_guard_rejected"
|
||||
};
|
||||
}
|
||||
if (hasUnsupportedLowConfidencePredecomposeSignal && !followupContext &&
|
||||
!hasAnyAddressSignal &&
|
||||
!strongDataSignalFromRawMessage &&
|
||||
|
|
@ -3242,12 +3324,22 @@ function resolveAssistantOrchestrationDecision(input) {
|
|||
const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample);
|
||||
const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample);
|
||||
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
|
||||
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
|
||||
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
|
||||
? llmPreDecomposeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const semanticContractValid = semanticExtractionContract?.valid !== false;
|
||||
const semanticApplyCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
|
||||
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
|
||||
? semanticExtractionContract.reason_codes
|
||||
: [];
|
||||
const strictDeepInvestigationCueDetected = hasStrictDeepInvestigationCue(rawUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(repairedRawUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(effectiveAddressUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage);
|
||||
const keepAddressLaneByIntent = Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
|
||||
(llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent))) &&
|
||||
const keepAddressLaneByIntent = semanticApplyCanonicalRecommended &&
|
||||
Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
|
||||
(llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent))) &&
|
||||
!strictDeepInvestigationCueDetected;
|
||||
const strongDataSignal = hasStrongDataIntentSignal(rawUserMessage) ||
|
||||
hasStrongDataIntentSignal(repairedRawUserMessage) ||
|
||||
|
|
@ -3446,6 +3538,9 @@ function resolveAssistantOrchestrationDecision(input) {
|
|||
address_intent_confidence: intentResolution.confidence,
|
||||
strong_data_signal_detected: strongDataSignal,
|
||||
data_retrieval_signal_detected: dataRetrievalSignal,
|
||||
semantic_contract_valid: semanticContractValid,
|
||||
semantic_apply_canonical_recommended: semanticApplyCanonicalRecommended,
|
||||
semantic_reason_codes: semanticReasonCodes,
|
||||
followup_context_detected: Boolean(followupContext),
|
||||
unsupported_address_intent_fallback_to_deep: unsupportedAddressIntentFallbackToDeep,
|
||||
deep_analysis_signal_fallback_to_deep: deepAnalysisSignalFallbackToDeep,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ export type AddressPredecomposeAggregationProfile =
|
|||
| "open_items"
|
||||
| "unknown";
|
||||
|
||||
export type AddressSemanticExtractionQuality = "high" | "medium" | "low";
|
||||
|
||||
export interface AddressLlmPredecomposeContractV1 {
|
||||
schema_version: "address_llm_predecompose_contract_v1";
|
||||
source_message: string;
|
||||
|
|
@ -41,6 +43,102 @@ export interface AddressLlmPredecomposeContractV1 {
|
|||
aggregation_profile: AddressPredecomposeAggregationProfile;
|
||||
}
|
||||
|
||||
export interface AddressSemanticExtractionContractV1 {
|
||||
schema_version: "address_semantic_extraction_contract_v1";
|
||||
source_message: string;
|
||||
canonical_message: string;
|
||||
canonical_rewrite_applied: boolean;
|
||||
extraction: {
|
||||
mode: AddressQuestionMode;
|
||||
mode_confidence: "high" | "medium" | "low";
|
||||
query_shape: AddressQueryShape;
|
||||
query_shape_confidence: "high" | "medium" | "low";
|
||||
intent: AddressIntent;
|
||||
intent_confidence: "high" | "medium" | "low";
|
||||
aggregation_profile: AddressPredecomposeAggregationProfile;
|
||||
};
|
||||
entities: AddressLlmPredecomposeContractV1["entities"];
|
||||
period: AddressLlmPredecomposeContractV1["period"];
|
||||
guard_hints: {
|
||||
source_data_signal_detected: boolean;
|
||||
canonical_data_signal_detected: boolean;
|
||||
data_scope_meta_query_detected: boolean;
|
||||
deep_investigation_signal_detected: boolean;
|
||||
required_anchor_missing: boolean;
|
||||
unsupported_low_confidence: boolean;
|
||||
semantic_drift_suspected: boolean;
|
||||
};
|
||||
quality: AddressSemanticExtractionQuality;
|
||||
valid: boolean;
|
||||
apply_canonical_recommended: boolean;
|
||||
reason_codes: string[];
|
||||
}
|
||||
|
||||
const ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN =
|
||||
/(?:\u0434\u043e\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0441\u0447(?:\u0435|\u0451)\u0442|\u0441\u0430\u043b\u044c\u0434\u043e|\u043e\u0431\u043e\u0440\u043e\u0442|\u043f\u043b\u0430\u0442(?:\u0435|\u0451)\u0436|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u0435\u0440\u0438\u043e\u0434|\u0433\u043e\u0434|counterparty|contract|document|account|balance|turnover|operations?|doki|doky|dokument|dogovor|kontragent|schet|saldo|platezh|oplata)/iu;
|
||||
|
||||
const ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN =
|
||||
/(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u043e\u0440|customer|supplier|counterparty|company|vendor|client)/iu;
|
||||
|
||||
const ADDRESS_SEMANTIC_SCOPE_META_PATTERN =
|
||||
/(?:\u043a\u0430\u043a\u0430\u044f\s+\u0431\u0430\u0437\u0430|\u0431\u0430\u0437\u0430\s+\u043a\u0430\u043a\u043e\u0439\s+\u043a\u043e\u043d\u0442\u043e\u0440|\u043f\u043e\s+\u043a\u0430\u043a\u0438\u043c\s+\u043a\u043e\u043d\u0442\u043e\u0440|which\s+company\s+base|which\s+tenant|data\s+scope)/iu;
|
||||
|
||||
const ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN =
|
||||
/(?:\u043f\u0440\u043e\u0432\u0435\u0440(?:\u044c|\u0438\u0442\u044c)|\u0440\u0430\u0437\u0431\u0435\u0440(?:\u0438|\u0430\u0442\u044c)|\u043f\u043e\u0447\u0435\u043c\u0443|\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c|\u0440\u0430\u0437\u0440\u044b\u0432|\u0445\u0432\u043e\u0441\u0442|root\s*cause|trace\s*chain|state\s+transition)/iu;
|
||||
|
||||
function normalizeCompact(value: unknown): string {
|
||||
return String(value ?? "")
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function hasSemanticDataSignal(value: unknown): boolean {
|
||||
return ADDRESS_SEMANTIC_DATA_SIGNAL_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
|
||||
function hasSemanticEntitySignal(value: unknown): boolean {
|
||||
return ADDRESS_SEMANTIC_ENTITY_SIGNAL_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
|
||||
function hasSemanticDataScopeMetaSignal(value: unknown): boolean {
|
||||
return ADDRESS_SEMANTIC_SCOPE_META_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
|
||||
function hasSemanticDeepInvestigationSignal(value: unknown): boolean {
|
||||
return ADDRESS_SEMANTIC_DEEP_INVESTIGATION_PATTERN.test(normalizeCompact(value));
|
||||
}
|
||||
|
||||
function requiredAnchorMissing(contract: AddressLlmPredecomposeContractV1): boolean {
|
||||
const intent = contract.intent;
|
||||
if (
|
||||
intent === "list_documents_by_counterparty" ||
|
||||
intent === "bank_operations_by_counterparty" ||
|
||||
intent === "list_contracts_by_counterparty"
|
||||
) {
|
||||
return !toNonEmptyString(contract.entities.counterparty);
|
||||
}
|
||||
if (intent === "list_documents_by_contract" || intent === "bank_operations_by_contract") {
|
||||
return !toNonEmptyString(contract.entities.contract);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function deriveSemanticQuality(input: {
|
||||
valid: boolean;
|
||||
unsupportedLowConfidence: boolean;
|
||||
requiredAnchorMissing: boolean;
|
||||
unknownIntentAndShape: boolean;
|
||||
}): AddressSemanticExtractionQuality {
|
||||
if (!input.valid) {
|
||||
return "low";
|
||||
}
|
||||
if (input.unsupportedLowConfidence || input.requiredAnchorMissing || input.unknownIntentAndShape) {
|
||||
return "medium";
|
||||
}
|
||||
return "high";
|
||||
}
|
||||
|
||||
function toNonEmptyString(value: unknown): string | null {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
|
|
@ -167,3 +265,115 @@ export function buildAddressLlmPredecomposeContractV1(input: {
|
|||
aggregation_profile: inferAggregationProfile(intent.intent, shape.shape)
|
||||
};
|
||||
}
|
||||
|
||||
export function buildAddressSemanticExtractionContractV1(input: {
|
||||
sourceMessage: string;
|
||||
canonicalMessage: string;
|
||||
predecomposeContract?: AddressLlmPredecomposeContractV1 | null;
|
||||
}): AddressSemanticExtractionContractV1 {
|
||||
const sourceMessage = String(input.sourceMessage ?? "").trim();
|
||||
const canonicalMessage = String(input.canonicalMessage ?? "").trim() || sourceMessage;
|
||||
const predecomposeContract =
|
||||
input.predecomposeContract ??
|
||||
buildAddressLlmPredecomposeContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage
|
||||
});
|
||||
|
||||
const sourceDataSignal = hasSemanticDataSignal(sourceMessage);
|
||||
const canonicalDataSignal = hasSemanticDataSignal(canonicalMessage);
|
||||
const canonicalEntitySignal = hasSemanticEntitySignal(canonicalMessage);
|
||||
const dataScopeMetaSignal = hasSemanticDataScopeMetaSignal(sourceMessage);
|
||||
const deepInvestigationSignal =
|
||||
hasSemanticDeepInvestigationSignal(sourceMessage) || hasSemanticDeepInvestigationSignal(canonicalMessage);
|
||||
const unsupportedLowConfidence =
|
||||
predecomposeContract.mode === "unsupported" &&
|
||||
(predecomposeContract.mode_confidence === "low" || predecomposeContract.mode_confidence === "medium");
|
||||
const missingRequiredAnchor = requiredAnchorMissing(predecomposeContract);
|
||||
const unknownIntentAndShape =
|
||||
predecomposeContract.intent === "unknown" && predecomposeContract.query_shape === "UNKNOWN";
|
||||
const rewriteApplied = normalizeCompact(sourceMessage) !== normalizeCompact(canonicalMessage);
|
||||
const semanticDriftSuspected = rewriteApplied && sourceDataSignal && !canonicalDataSignal;
|
||||
|
||||
const reasonCodes: string[] = [];
|
||||
if (dataScopeMetaSignal) {
|
||||
reasonCodes.push("data_scope_meta_query_detected");
|
||||
}
|
||||
if (unsupportedLowConfidence) {
|
||||
reasonCodes.push("unsupported_low_confidence_contract");
|
||||
}
|
||||
if (missingRequiredAnchor) {
|
||||
reasonCodes.push("required_anchor_missing_for_intent");
|
||||
}
|
||||
if (semanticDriftSuspected) {
|
||||
reasonCodes.push("semantic_drift_source_vs_canonical");
|
||||
}
|
||||
if (unknownIntentAndShape && rewriteApplied) {
|
||||
reasonCodes.push("rewrite_without_structured_gain");
|
||||
}
|
||||
if (deepInvestigationSignal) {
|
||||
reasonCodes.push("deep_investigation_signal_detected");
|
||||
}
|
||||
|
||||
const valid =
|
||||
!dataScopeMetaSignal &&
|
||||
!semanticDriftSuspected &&
|
||||
!(unsupportedLowConfidence && !canonicalDataSignal && !sourceDataSignal && !canonicalEntitySignal) &&
|
||||
!(unknownIntentAndShape && rewriteApplied && !canonicalDataSignal && !canonicalEntitySignal);
|
||||
|
||||
const applyCanonicalRecommended =
|
||||
valid &&
|
||||
!(unsupportedLowConfidence && sourceDataSignal && rewriteApplied) &&
|
||||
!(missingRequiredAnchor && rewriteApplied);
|
||||
|
||||
const quality = deriveSemanticQuality({
|
||||
valid,
|
||||
unsupportedLowConfidence,
|
||||
requiredAnchorMissing: missingRequiredAnchor,
|
||||
unknownIntentAndShape
|
||||
});
|
||||
|
||||
return {
|
||||
schema_version: "address_semantic_extraction_contract_v1",
|
||||
source_message: sourceMessage,
|
||||
canonical_message: canonicalMessage,
|
||||
canonical_rewrite_applied: rewriteApplied,
|
||||
extraction: {
|
||||
mode: predecomposeContract.mode,
|
||||
mode_confidence: predecomposeContract.mode_confidence,
|
||||
query_shape: predecomposeContract.query_shape,
|
||||
query_shape_confidence: predecomposeContract.query_shape_confidence,
|
||||
intent: predecomposeContract.intent,
|
||||
intent_confidence: predecomposeContract.intent_confidence,
|
||||
aggregation_profile: predecomposeContract.aggregation_profile
|
||||
},
|
||||
entities: {
|
||||
account: predecomposeContract.entities.account,
|
||||
counterparty: predecomposeContract.entities.counterparty,
|
||||
contract: predecomposeContract.entities.contract,
|
||||
document_type: predecomposeContract.entities.document_type,
|
||||
document_ref: predecomposeContract.entities.document_ref,
|
||||
organization: predecomposeContract.entities.organization
|
||||
},
|
||||
period: {
|
||||
scope: predecomposeContract.period.scope,
|
||||
period_from: predecomposeContract.period.period_from,
|
||||
period_to: predecomposeContract.period.period_to,
|
||||
as_of_date: predecomposeContract.period.as_of_date,
|
||||
has_explicit_period: predecomposeContract.period.has_explicit_period
|
||||
},
|
||||
guard_hints: {
|
||||
source_data_signal_detected: sourceDataSignal,
|
||||
canonical_data_signal_detected: canonicalDataSignal,
|
||||
data_scope_meta_query_detected: dataScopeMetaSignal,
|
||||
deep_investigation_signal_detected: deepInvestigationSignal,
|
||||
required_anchor_missing: missingRequiredAnchor,
|
||||
unsupported_low_confidence: unsupportedLowConfidence,
|
||||
semantic_drift_suspected: semanticDriftSuspected
|
||||
},
|
||||
quality,
|
||||
valid,
|
||||
apply_canonical_recommended: applyCanonicalRecommended,
|
||||
reason_codes: reasonCodes
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ function normalizeLlmPreDecomposeMeta(value: unknown): AddressLlmPreDecomposeMet
|
|||
const addressRetryAuditRaw = toRecordObject(source.addressRetryAudit);
|
||||
const predecomposeContractRaw = toRecordObject(source.predecomposeContract);
|
||||
const predecomposePeriodRaw = toRecordObject(predecomposeContractRaw?.period);
|
||||
const semanticExtractionContractRaw = toRecordObject(source.semanticExtractionContract);
|
||||
|
||||
const normalized: AddressLlmPreDecomposeMetaLogInput = {};
|
||||
|
||||
|
|
@ -170,6 +171,27 @@ function normalizeLlmPreDecomposeMeta(value: unknown): AddressLlmPreDecomposeMet
|
|||
}
|
||||
}
|
||||
|
||||
if (semanticExtractionContractRaw) {
|
||||
const valid = toNullableBoolean(semanticExtractionContractRaw.valid);
|
||||
const quality = toNullableString(semanticExtractionContractRaw.quality);
|
||||
const applyCanonicalRecommended = toNullableBoolean(
|
||||
semanticExtractionContractRaw.apply_canonical_recommended
|
||||
);
|
||||
const reasonCodes = Array.isArray(semanticExtractionContractRaw.reason_codes)
|
||||
? semanticExtractionContractRaw.reason_codes
|
||||
.map((item) => toNullableString(item))
|
||||
.filter((item): item is string => Boolean(item))
|
||||
: [];
|
||||
if (valid !== undefined || quality || applyCanonicalRecommended !== undefined || reasonCodes.length > 0) {
|
||||
normalized.semanticExtractionContract = {
|
||||
valid: valid ?? null,
|
||||
quality,
|
||||
apply_canonical_recommended: applyCanonicalRecommended ?? null,
|
||||
reason_codes: reasonCodes
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const hasUsefulField = Object.values(normalized).some((item) => item !== undefined && item !== null);
|
||||
return hasUsefulField ? normalized : null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ export interface AssistantAddressToolGateRuntimeInput<ResponseType = unknown> {
|
|||
scope?: unknown;
|
||||
} | null;
|
||||
} | null;
|
||||
semanticExtractionContract?: {
|
||||
valid?: unknown;
|
||||
quality?: unknown;
|
||||
apply_canonical_recommended?: unknown;
|
||||
reason_codes?: unknown;
|
||||
} | null;
|
||||
[key: string]: unknown;
|
||||
} | null;
|
||||
logEvent: (payload: Record<string, unknown>) => void;
|
||||
|
|
@ -67,6 +73,18 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
|
|||
predecomposeContract?.period && typeof predecomposeContract.period === "object"
|
||||
? predecomposeContract.period
|
||||
: null;
|
||||
const semanticExtractionContract =
|
||||
runtimeMeta.semanticExtractionContract && typeof runtimeMeta.semanticExtractionContract === "object"
|
||||
? (runtimeMeta.semanticExtractionContract as {
|
||||
valid?: unknown;
|
||||
quality?: unknown;
|
||||
apply_canonical_recommended?: unknown;
|
||||
reason_codes?: unknown;
|
||||
})
|
||||
: null;
|
||||
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
|
||||
? semanticExtractionContract?.reason_codes
|
||||
: [];
|
||||
|
||||
input.logEvent({
|
||||
timestamp: input.nowIso(),
|
||||
|
|
@ -88,7 +106,12 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
|
|||
address_tool_gate_reason: runtimeMeta.toolGateReason ?? null,
|
||||
address_llm_predecompose_contract_intent: predecomposeContract?.intent ?? null,
|
||||
address_llm_predecompose_contract_aggregation_profile: predecomposeContract?.aggregation_profile ?? null,
|
||||
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null
|
||||
address_llm_predecompose_contract_period_scope: predecomposePeriod?.scope ?? null,
|
||||
address_semantic_contract_valid: semanticExtractionContract?.valid ?? null,
|
||||
address_semantic_contract_quality: semanticExtractionContract?.quality ?? null,
|
||||
address_semantic_apply_canonical_recommended:
|
||||
semanticExtractionContract?.apply_canonical_recommended ?? null,
|
||||
address_semantic_reason_codes: semanticReasonCodes
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -107,4 +130,3 @@ export async function runAssistantAddressToolGateRuntime<ResponseType = unknown>
|
|||
response: null
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,12 @@ export interface AddressLlmPreDecomposeMetaLogInput {
|
|||
scope?: string | null;
|
||||
} | null;
|
||||
} | null;
|
||||
semanticExtractionContract?: {
|
||||
valid?: boolean | null;
|
||||
quality?: string | null;
|
||||
apply_canonical_recommended?: boolean | null;
|
||||
reason_codes?: string[] | null;
|
||||
} | null;
|
||||
}
|
||||
|
||||
export interface FinalizeAssistantAddressTurnInput {
|
||||
|
|
@ -109,6 +115,13 @@ function buildAddressProcessedLogDetails(input: FinalizeAssistantAddressTurnInpu
|
|||
address_llm_predecompose_contract_intent: llmMeta?.predecomposeContract?.intent ?? null,
|
||||
address_llm_predecompose_contract_aggregation_profile: llmMeta?.predecomposeContract?.aggregation_profile ?? null,
|
||||
address_llm_predecompose_contract_period_scope: llmMeta?.predecomposeContract?.period?.scope ?? null,
|
||||
address_semantic_contract_valid: llmMeta?.semanticExtractionContract?.valid ?? null,
|
||||
address_semantic_contract_quality: llmMeta?.semanticExtractionContract?.quality ?? null,
|
||||
address_semantic_apply_canonical_recommended:
|
||||
llmMeta?.semanticExtractionContract?.apply_canonical_recommended ?? null,
|
||||
address_semantic_reason_codes: Array.isArray(llmMeta?.semanticExtractionContract?.reason_codes)
|
||||
? llmMeta.semanticExtractionContract?.reason_codes
|
||||
: [],
|
||||
detected_mode: laneDebug.detected_mode,
|
||||
query_shape: laneDebug.query_shape,
|
||||
detected_intent: laneDebug.detected_intent,
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInp
|
|||
address_tool_gate_decision: input.addressRuntimeMetaForDeep?.toolGateDecision ?? null,
|
||||
address_tool_gate_reason: input.addressRuntimeMetaForDeep?.toolGateReason ?? null,
|
||||
address_llm_predecompose_contract: input.addressRuntimeMetaForDeep?.predecomposeContract ?? null,
|
||||
address_semantic_extraction_contract: input.addressRuntimeMetaForDeep?.semanticExtractionContract ?? null,
|
||||
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
|
|
|
|||
|
|
@ -159,6 +159,10 @@ export async function runAssistantLivingChatRuntime(
|
|||
addressRuntimeMeta.predecomposeContract && typeof addressRuntimeMeta.predecomposeContract === "object"
|
||||
? (addressRuntimeMeta.predecomposeContract as Record<string, unknown>)
|
||||
: null;
|
||||
const semanticExtractionContract =
|
||||
addressRuntimeMeta.semanticExtractionContract && typeof addressRuntimeMeta.semanticExtractionContract === "object"
|
||||
? (addressRuntimeMeta.semanticExtractionContract as Record<string, unknown>)
|
||||
: null;
|
||||
|
||||
const debug: Record<string, unknown> = {
|
||||
trace_id: input.traceIdFactory(),
|
||||
|
|
@ -191,6 +195,7 @@ export async function runAssistantLivingChatRuntime(
|
|||
address_llm_predecompose_applied: Boolean(addressRuntimeMeta.applied),
|
||||
address_llm_predecompose_reason: addressRuntimeMeta.reason ?? null,
|
||||
address_llm_predecompose_contract: predecomposeContract,
|
||||
address_semantic_extraction_contract: semanticExtractionContract,
|
||||
orchestration_contract_v1: addressRuntimeMeta.orchestrationContract ?? null,
|
||||
tool_gate_decision: addressRuntimeMeta.toolGateDecision ?? null,
|
||||
tool_gate_reason: addressRuntimeMeta.toolGateReason ?? null,
|
||||
|
|
|
|||
|
|
@ -2714,12 +2714,19 @@ function hasSameDateAccountFollowupSignalForPredecompose(text) {
|
|||
}
|
||||
function attachAddressPredecomposeContract(meta, sourceMessage) {
|
||||
const canonicalMessage = toNonEmptyString(meta?.effectiveMessage) ?? String(sourceMessage ?? "");
|
||||
const predecomposeContract = (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage
|
||||
});
|
||||
const semanticExtractionContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage,
|
||||
predecomposeContract
|
||||
});
|
||||
return {
|
||||
...meta,
|
||||
predecomposeContract: (0, predecomposeContract_1.buildAddressLlmPredecomposeContractV1)({
|
||||
sourceMessage: String(sourceMessage ?? ""),
|
||||
canonicalMessage
|
||||
})
|
||||
predecomposeContract,
|
||||
semanticExtractionContract
|
||||
};
|
||||
}
|
||||
async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) {
|
||||
|
|
@ -2910,6 +2917,63 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
|
|||
}, userMessage);
|
||||
}
|
||||
}
|
||||
const semanticContractForCandidate = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(userMessage ?? ""),
|
||||
canonicalMessage: candidate
|
||||
});
|
||||
if (!semanticContractForCandidate.apply_canonical_recommended) {
|
||||
const sourceDataSignalDetected = Boolean(semanticContractForCandidate?.guard_hints?.source_data_signal_detected);
|
||||
const rawFragmentCandidatePreferred = Boolean(sourceDataSignalDetected &&
|
||||
candidateFromNormalized &&
|
||||
candidateFromNormalized === candidate &&
|
||||
toNonEmptyString(candidate));
|
||||
if (rawFragmentCandidatePreferred) {
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: true,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: candidate,
|
||||
reason: "normalized_fragment_semantic_guard_raw_fragment_preferred",
|
||||
fallbackRuleHit: null,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
if (fallbackCandidate) {
|
||||
const fallbackSemanticContract = (0, predecomposeContract_1.buildAddressSemanticExtractionContractV1)({
|
||||
sourceMessage: String(userMessage ?? ""),
|
||||
canonicalMessage: String(fallbackCandidate.candidate ?? "")
|
||||
});
|
||||
const fallbackCompact = compactWhitespace(String(fallbackCandidate.candidate ?? "").toLowerCase());
|
||||
const sourceCompactForFallback = compactWhitespace(String(userMessage ?? "").toLowerCase());
|
||||
const fallbackApplied = fallbackCompact.length > 0 && fallbackCompact !== sourceCompactForFallback;
|
||||
if (fallbackApplied && fallbackSemanticContract.apply_canonical_recommended && !sourceDataSignalDetected) {
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: true,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: String(fallbackCandidate.candidate ?? ""),
|
||||
reason: "fallback_rule_preferred_over_llm_candidate_semantic_guard",
|
||||
fallbackRuleHit: fallbackCandidate.rule,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
}
|
||||
return attachAddressPredecomposeContract({
|
||||
...baseMeta,
|
||||
attempted: true,
|
||||
applied: false,
|
||||
traceId: normalized?.trace_id ?? null,
|
||||
llmCanonicalCandidateDetected: true,
|
||||
effectiveMessage: userMessage,
|
||||
reason: "normalized_fragment_rejected_semantic_guard",
|
||||
fallbackRuleHit: null,
|
||||
sanitizedUserMessage
|
||||
}, userMessage);
|
||||
}
|
||||
const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase());
|
||||
const candidateCompact = compactWhitespace(candidate.toLowerCase());
|
||||
const applied = sourceCompact !== candidateCompact;
|
||||
|
|
@ -2986,13 +3050,20 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
|
|||
const llmContractMode = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode);
|
||||
const llmContractModeConfidence = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.mode_confidence);
|
||||
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
|
||||
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
|
||||
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
|
||||
? llmPreDecomposeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const semanticCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
|
||||
const llmCanonicalEntitySignal = /(?:\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043e\u043c\u043f\u0430\u043d|customer|supplier|counterparty|company|vendor|client)/iu.test(compactWhitespace(repairedInputMessage.toLowerCase()));
|
||||
const llmCanonicalAppliedSignal = Boolean(llmPreDecomposeMeta?.applied) && llmContractMode !== "deep_analysis";
|
||||
const hasLlmCanonicalSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
const hasLlmCanonicalSignal = semanticCanonicalRecommended &&
|
||||
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
((llmContractMode === "address_query" && llmContractModeConfidence !== "low") ||
|
||||
(llmCanonicalAppliedSignal &&
|
||||
(hasStrongDataIntentSignal(repairedInputMessage) || llmCanonicalEntitySignal)));
|
||||
const hasLlmCanonicalDataSignal = Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
const hasLlmCanonicalDataSignal = semanticCanonicalRecommended &&
|
||||
Boolean(llmPreDecomposeMeta?.llmCanonicalCandidateDetected) &&
|
||||
Boolean(llmPreDecomposeMeta?.applied) &&
|
||||
(llmContractMode === "address_query" || llmContractMode === "unsupported" || llmContractMode === null) &&
|
||||
hasStrongDataIntentSignal(repairedInputMessage);
|
||||
|
|
@ -3013,6 +3084,17 @@ function resolveAddressToolGateDecision(addressInputMessage, followupContext, ll
|
|||
const strongDataSignalFromEffectiveMessage = hasStrongDataIntentSignal(repairedInputMessage) ||
|
||||
hasAccountingSignal(repairedInputMessage) ||
|
||||
hasDataRetrievalRequestSignal(repairedInputMessage);
|
||||
if (!semanticCanonicalRecommended &&
|
||||
llmContractIntent === "unknown" &&
|
||||
!followupContext &&
|
||||
!hasClassifierSignal &&
|
||||
!strongDataSignalFromRawMessage) {
|
||||
return {
|
||||
runAddressLane: false,
|
||||
decision: "skip_address_lane",
|
||||
reason: "llm_predecompose_semantic_guard_rejected"
|
||||
};
|
||||
}
|
||||
if (hasUnsupportedLowConfidencePredecomposeSignal && !followupContext &&
|
||||
!hasAnyAddressSignal &&
|
||||
!strongDataSignalFromRawMessage &&
|
||||
|
|
@ -3198,11 +3280,21 @@ export function resolveAssistantOrchestrationDecision(input) {
|
|||
const modeDetection = (0, addressQueryClassifier_1.detectAddressQuestionMode)(modeSample);
|
||||
const intentResolution = (0, addressIntentResolver_1.resolveAddressIntent)(modeSample);
|
||||
const llmContractIntent = toNonEmptyString(llmPreDecomposeMeta?.predecomposeContract?.intent);
|
||||
const semanticExtractionContract = llmPreDecomposeMeta?.semanticExtractionContract &&
|
||||
typeof llmPreDecomposeMeta.semanticExtractionContract === "object"
|
||||
? llmPreDecomposeMeta.semanticExtractionContract
|
||||
: null;
|
||||
const semanticContractValid = semanticExtractionContract?.valid !== false;
|
||||
const semanticApplyCanonicalRecommended = semanticExtractionContract?.apply_canonical_recommended !== false;
|
||||
const semanticReasonCodes = Array.isArray(semanticExtractionContract?.reason_codes)
|
||||
? semanticExtractionContract.reason_codes
|
||||
: [];
|
||||
const strictDeepInvestigationCueDetected = hasStrictDeepInvestigationCue(rawUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(repairedRawUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(effectiveAddressUserMessage) ||
|
||||
hasStrictDeepInvestigationCue(repairedEffectiveAddressUserMessage);
|
||||
const keepAddressLaneByIntent = Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
|
||||
const keepAddressLaneByIntent = semanticApplyCanonicalRecommended &&
|
||||
Boolean((intentResolution.intent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(intentResolution.intent)) ||
|
||||
(llmContractIntent && ADDRESS_INTENTS_KEEP_ADDRESS_LANE.has(llmContractIntent))) &&
|
||||
!strictDeepInvestigationCueDetected;
|
||||
const strongDataSignal = hasStrongDataIntentSignal(rawUserMessage) ||
|
||||
|
|
@ -3402,6 +3494,9 @@ export function resolveAssistantOrchestrationDecision(input) {
|
|||
address_intent_confidence: intentResolution.confidence,
|
||||
strong_data_signal_detected: strongDataSignal,
|
||||
data_retrieval_signal_detected: dataRetrievalSignal,
|
||||
semantic_contract_valid: semanticContractValid,
|
||||
semantic_apply_canonical_recommended: semanticApplyCanonicalRecommended,
|
||||
semantic_reason_codes: semanticReasonCodes,
|
||||
followup_context_detected: Boolean(followupContext),
|
||||
unsupported_address_intent_fallback_to_deep: unsupportedAddressIntentFallbackToDeep,
|
||||
deep_analysis_signal_fallback_to_deep: deepAnalysisSignalFallbackToDeep,
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ export interface AssistantAddressRuntimeMetaForDeep {
|
|||
toolGateDecision?: string | null;
|
||||
toolGateReason?: string | null;
|
||||
predecomposeContract?: Record<string, unknown> | null;
|
||||
semanticExtractionContract?: Record<string, unknown> | null;
|
||||
orchestrationContract?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1047,14 +1047,16 @@ describe("assistant address llm pre-decompose candidate preference", () => {
|
|||
} as any);
|
||||
|
||||
expect(response.ok).toBe(true);
|
||||
expect(response.reply_type).toBe("factual");
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(calls[0].message).toBe("заказчики компании svk");
|
||||
expect(response.debug?.llm_decomposition_attempted).toBe(true);
|
||||
expect(response.debug?.llm_decomposition_applied).toBe(true);
|
||||
expect(response.debug?.llm_canonical_candidate_detected).toBe(true);
|
||||
expect(response.debug?.tool_gate_decision).toBe("run_address_lane");
|
||||
expect(["llm_canonical_candidate_detected", "llm_canonical_data_signal_detected", "address_mode_classifier_detected"]).toContain(response.debug?.tool_gate_reason);
|
||||
expect(response.reply_type).toBe("clarification_required");
|
||||
expect(calls).toHaveLength(0);
|
||||
expect(response.debug?.address_tool_gate_decision).toBe("skip_address_lane");
|
||||
expect(
|
||||
[
|
||||
"llm_predecompose_semantic_guard_rejected",
|
||||
"llm_predecompose_unsupported_mode",
|
||||
"address_signal_unsupported_intent_fallback_to_deep"
|
||||
]
|
||||
).toContain(response.debug?.address_tool_gate_reason);
|
||||
});
|
||||
|
||||
it("normalizes short ordinal year like '20й' in noisy docs phrasing", async () => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildAddressLlmPredecomposeContractV1,
|
||||
buildAddressSemanticExtractionContractV1
|
||||
} from "../src/services/address_runtime/predecomposeContract";
|
||||
|
||||
describe("address semantic extraction contract", () => {
|
||||
it("rejects low-confidence unsupported rewrite without data signal", () => {
|
||||
const sourceMessage = "yo";
|
||||
const canonicalMessage = "yoft";
|
||||
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage
|
||||
});
|
||||
|
||||
const semantic = buildAddressSemanticExtractionContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage,
|
||||
predecomposeContract
|
||||
});
|
||||
|
||||
expect(semantic.schema_version).toBe("address_semantic_extraction_contract_v1");
|
||||
expect(semantic.guard_hints.source_data_signal_detected).toBe(false);
|
||||
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
|
||||
expect(semantic.guard_hints.unsupported_low_confidence).toBe(true);
|
||||
expect(semantic.valid).toBe(false);
|
||||
expect(semantic.apply_canonical_recommended).toBe(false);
|
||||
expect(semantic.reason_codes).toContain("unsupported_low_confidence_contract");
|
||||
});
|
||||
|
||||
it("flags semantic drift when canonical loses data intent", () => {
|
||||
const sourceMessage = "покажи документы по договору 12";
|
||||
const canonicalMessage = "помоги разобраться";
|
||||
const semantic = buildAddressSemanticExtractionContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage
|
||||
});
|
||||
|
||||
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
|
||||
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
|
||||
expect(semantic.guard_hints.semantic_drift_suspected).toBe(true);
|
||||
expect(semantic.valid).toBe(false);
|
||||
expect(semantic.apply_canonical_recommended).toBe(false);
|
||||
expect(semantic.reason_codes).toContain("semantic_drift_source_vs_canonical");
|
||||
});
|
||||
|
||||
it("keeps canonical rewrite when semantic contract remains coherent", () => {
|
||||
const sourceMessage = "Покажи незакрытые договоры на 2020-12-31";
|
||||
const canonicalMessage = "Показать незакрытые договоры по состоянию на конец декабря 2020 года.";
|
||||
const semantic = buildAddressSemanticExtractionContractV1({
|
||||
sourceMessage,
|
||||
canonicalMessage
|
||||
});
|
||||
|
||||
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
|
||||
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(true);
|
||||
expect(semantic.valid).toBe(true);
|
||||
expect(semantic.apply_canonical_recommended).toBe(true);
|
||||
expect(["high", "medium"]).toContain(semantic.quality);
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue