NODEDC_1C/llm_normalizer/backend/tests/assistantSemanticExtraction...

129 lines
6.0 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import {
buildAddressLlmPredecomposeContractV1,
buildAddressSemanticExtractionContractV1
} from "../src/services/address_runtime/predecomposeContract";
describe("address semantic extraction contract", () => {
it("rejects low-confidence unsupported rewrite without data signal", () => {
const sourceMessage = "yo";
const canonicalMessage = "yoft";
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage
});
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage,
predecomposeContract
});
expect(semantic.schema_version).toBe("address_semantic_extraction_contract_v1");
expect(semantic.guard_hints.source_data_signal_detected).toBe(false);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
expect(semantic.guard_hints.unsupported_low_confidence).toBe(true);
expect(semantic.valid).toBe(false);
expect(semantic.apply_canonical_recommended).toBe(false);
expect(semantic.reason_codes).toContain("unsupported_low_confidence_contract");
});
it("flags semantic drift when canonical loses data intent", () => {
const sourceMessage = "покажи документы по договору 12";
const canonicalMessage = "помоги разобраться";
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage
});
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(false);
expect(semantic.guard_hints.semantic_drift_suspected).toBe(true);
expect(semantic.valid).toBe(false);
expect(semantic.apply_canonical_recommended).toBe(false);
expect(semantic.reason_codes).toContain("semantic_drift_source_vs_canonical");
});
it("keeps canonical rewrite when semantic contract remains coherent", () => {
const sourceMessage = "Покажи незакрытые договоры на 2020-12-31";
const canonicalMessage = "Показать незакрытые договоры по состоянию на конец декабря 2020 года.";
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage
});
expect(semantic.guard_hints.source_data_signal_detected).toBe(true);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(true);
expect(semantic.valid).toBe(true);
expect(semantic.apply_canonical_recommended).toBe(true);
expect(["high", "medium"]).toContain(semantic.quality);
});
it("marks self-scope stock snapshot wording as implicit current scope, not explicit date", () => {
const sourceMessage = "что на складе у нас";
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage: sourceMessage
});
expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date");
expect(predecomposeContract.period.has_explicit_period).toBe(false);
expect(predecomposeContract.semantics.scope_kind).toBe("implicit_self_scope");
expect(predecomposeContract.semantics.anchor_kind).toBe("self_scope");
expect(predecomposeContract.semantics.date_scope_kind).toBe("implicit_current");
expect(predecomposeContract.semantics.date_basis_hint).toBe("implicit_current_snapshot");
});
it("accepts llm semantic hints for organization-scoped informal warehouse wording", () => {
const sourceMessage = "что на складе конторы альтернатива";
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage: sourceMessage,
semanticHints: {
scope_target_kind: "organization",
scope_target_text: "Альтернатива",
date_scope_kind: "implicit_current",
self_scope_detected: false,
selected_object_scope_detected: false
}
});
expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date");
expect(predecomposeContract.entities.organization).toBe("Альтернатива");
expect(predecomposeContract.entities.counterparty).toBeNull();
expect(predecomposeContract.semantics.scope_kind).toBe("explicit_anchor");
expect(predecomposeContract.semantics.anchor_kind).toBe("organization");
expect(predecomposeContract.semantics.anchor_value).toBe("Альтернатива");
expect(predecomposeContract.period.has_explicit_period).toBe(false);
expect(predecomposeContract.semantics.date_scope_kind).toBe("implicit_current");
});
it("keeps slang stock-state rewrite as address snapshot instead of deep investigation", () => {
const sourceMessage = "чекни плиз чо там на складе альтернативы происходит";
const canonicalMessage = "проверь, что происходит на складе у компании 'альтернатива'";
const predecomposeContract = buildAddressLlmPredecomposeContractV1({
sourceMessage,
canonicalMessage,
semanticHints: {
scope_target_kind: "organization",
scope_target_text: "альтернатива",
date_scope_kind: "implicit_current",
self_scope_detected: false,
selected_object_scope_detected: false
}
});
const semantic = buildAddressSemanticExtractionContractV1({
sourceMessage,
canonicalMessage,
predecomposeContract
});
expect(predecomposeContract.mode).toBe("address_query");
expect(predecomposeContract.intent).toBe("inventory_on_hand_as_of_date");
expect(predecomposeContract.entities.organization).toBe("альтернатива");
expect(semantic.guard_hints.deep_investigation_signal_detected).toBe(false);
expect(semantic.guard_hints.canonical_data_signal_detected).toBe(true);
expect(semantic.valid).toBe(true);
expect(semantic.apply_canonical_recommended).toBe(true);
});
});