NODEDC_1C/llm_normalizer/backend/src/services/addressCounterpartyIntentSi...

387 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { AddressIntentResolution } from "../types/addressQuery";
type HintList = string[];
type CounterpartyIntentDeps = {
hasAny: (text: string, hints: string[]) => boolean;
openItemsHints: HintList;
openContractsHints: HintList;
documentsByCounterpartyHints: HintList;
bankOperationsByCounterpartyHints: HintList;
documentsByContractHints: HintList;
hasCounterpartyDebtLongevitySignal: (text: string) => boolean;
hasInventoryAgingSignal: (text: string) => boolean;
hasInventoryProvenanceSignalV2: (text: string) => boolean;
hasInventoryPurchaseDocumentsSignalV2: (text: string) => boolean;
hasInventorySaleTraceSignalV2: (text: string) => boolean;
hasAccountNumberAnchor: (text: string) => boolean;
hasCompactAccountCodeToken: (text: string) => boolean;
hasPeriodCoverageProfileSignal: (text: string) => boolean;
hasPartyAnchorMention: (text: string) => boolean;
hasContractAnchorSignal: (text: string) => boolean;
hasAccountBalanceSignal: (text: string) => boolean;
hasDocumentTypeAndAccountSectionProfileSignal: (text: string) => boolean;
hasCounterpartyPopulationAndRolesSignal: (text: string) => boolean;
hasCounterpartyActivityLifecycleSignal: (text: string) => boolean;
hasContractUsageOverviewSignal: (text: string) => boolean;
hasOpenContractsListSignal: (text: string) => boolean;
hasCustomerRevenueAndPaymentsSignal: (text: string) => boolean;
hasSupplierPayoutsProfileSignal: (text: string) => boolean;
hasContractUsageAndValueSignal: (text: string) => boolean;
hasContractListByCounterpartySignal: (text: string) => boolean;
hasBankOperationSignal: (text: string) => boolean;
hasDocumentSignal: (text: string) => boolean;
hasLooseByAnchorMention: (text: string) => boolean;
hasHeuristicCounterpartyAnchor: (text: string) => boolean;
hasCounterpartyShipmentItemFlowSignal: (text: string) => boolean;
hasImplicitCounterpartyAnchorAroundDocs: (text: string) => boolean;
hasGenericAddressLookupSignal: (text: string) => boolean;
};
function hasUnicodeOpenItemsAccountSignal(text: string): boolean {
const normalized = String(text ?? "").toLowerCase();
if (!normalized) {
return false;
}
return (
/(?:\u0445\u0432\u043e\u0441\u0442|\u0434\u043e\u043b\u0433|\u043d\u0435\u0437\u0430\u043a\u0440\u044b\u0442|\u0432\u0438\u0441)/iu.test(normalized) &&
/(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?\s*(?:\u2116|#)?\s*(?:60|62|76)(?:\.\d{2})?|(?:60|62|76)(?:\.\d{2})?\s*(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?))/iu.test(
normalized
)
);
}
function hasUnicodeCounterpartyShipmentItemFlowSignal(text: string): boolean {
return /(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0430\u043b)/iu.test(text) &&
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text);
}
function hasUnicodePassiveShipmentByCounterpartySignal(text: string): boolean {
return (
/(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0435\u043d(?:\u044b|\u043e|\u0430)?)/iu.test(text) &&
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text) &&
/(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c)/iu.test(text)
);
}
function hasUnicodeDocumentSignal(text: string): boolean {
return /(?:\u0434\u043e\u043a(?:\u0443\u043c\u0435\u043d\u0442(?:\u044b|\u043e\u0432|\u0430\u043c|\u0430\u043c\u0438|\u0430\u0445)?|\u0438)?|\u0434\u043e\u043a\u0438)(?=$|[\s,.;:!?()])/iu.test(text);
}
function hasUnicodeLikelyCounterpartyAfterBy(text: string): boolean {
const match = text.match(/(?:^|[\s(])\u043f\u043e\s+([\p{L}\d][\p{L}\d._-]{1,})(?=$|[\s,.;:!?()])/iu);
if (!match) {
return false;
}
const token = String(match[1] ?? "").toLowerCase();
if (!token) {
return false;
}
const stopWords = new Set([
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0443",
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430",
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c",
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438",
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044e",
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u0438",
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u044e",
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0443",
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u043e\u043c",
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u0443",
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u043e\u043c",
"\u0441\u0447\u0435\u0442\u0443",
"\u0441\u0447\u0451\u0442\u0443",
"\u0441\u0447\u0435\u0442\u043e\u043c",
"\u0441\u0447\u0451\u0442\u043e\u043c",
"\u0434\u0430\u0442\u0435",
"\u0434\u0430\u0442\u0443",
"\u043f\u0435\u0440\u0438\u043e\u0434\u0443",
"\u043f\u0435\u0440\u0438\u043e\u0434",
"\u0441\u043a\u043b\u0430\u0434\u0443",
"\u0441\u043a\u043b\u0430\u0434\u0435",
"\u0431\u0430\u043d\u043a\u0443",
"\u0431\u0430\u043d\u043a\u0435",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443"
]);
return !stopWords.has(token);
}
export function resolveCounterpartyAddressIntent(
text: string,
deps: CounterpartyIntentDeps
): AddressIntentResolution | null {
if (hasUnicodeOpenItemsAccountSignal(text)) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (
/(?:хвост|долг|незакрыт|вис)/iu.test(text) &&
/(?:счету?|сч[её]ту?)\s*60|60\s*(?:счет|сч[её]т)/iu.test(text)
) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (
/отгружал/iu.test(text) &&
/(?:товар|услуг|позици|номенклатур)/iu.test(text)
) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (
/отгружен(?:ы|о|а)?/iu.test(text) &&
/(?:товар|услуг|позици|номенклатур)/iu.test(text) &&
/контрагентом/iu.test(text)
) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (hasUnicodeCounterpartyShipmentItemFlowSignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (hasUnicodePassiveShipmentByCounterpartySignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (deps.hasOpenContractsListSignal(text)) {
return {
intent: "open_contracts_confirmed_as_of_date",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
if (
deps.hasAny(text, deps.openItemsHints) &&
!deps.hasCounterpartyDebtLongevitySignal(text) &&
!deps.hasInventoryAgingSignal(text) &&
!deps.hasInventoryProvenanceSignalV2(text) &&
!deps.hasInventoryPurchaseDocumentsSignalV2(text) &&
!deps.hasInventorySaleTraceSignalV2(text) &&
(
/(?:РєРѕРЅСраг|РґРѕРіРѕРІРѕСЂ|РєРѕРЅСракС|counterparty|contract|РїРѕРєСѓРїР°Сел|клиенС|заказСРёРє|customer|client|buyer|supplier|РїРѕСЃСавСРёРє)/iu.test(
text
) ||
deps.hasAccountNumberAnchor(text) ||
deps.hasCompactAccountCodeToken(text)
)
) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (
deps.hasPeriodCoverageProfileSignal(text) &&
!deps.hasPartyAnchorMention(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)
) {
return {
intent: "period_coverage_profile",
confidence: "high",
reasons: ["period_coverage_profile_signal_detected"]
};
}
if (
deps.hasDocumentTypeAndAccountSectionProfileSignal(text) &&
!deps.hasPartyAnchorMention(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)
) {
return {
intent: "document_type_and_account_section_profile",
confidence: "high",
reasons: ["document_type_and_account_section_profile_signal_detected"]
};
}
if (
deps.hasCounterpartyPopulationAndRolesSignal(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)
) {
return {
intent: "counterparty_population_and_roles",
confidence: "high",
reasons: ["counterparty_population_and_roles_signal_detected"]
};
}
if (
deps.hasCounterpartyActivityLifecycleSignal(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)
) {
return {
intent: "counterparty_activity_lifecycle",
confidence: "high",
reasons: ["counterparty_activity_lifecycle_signal_detected"]
};
}
if (
deps.hasContractUsageOverviewSignal(text) &&
!deps.hasAccountBalanceSignal(text) &&
!deps.hasOpenContractsListSignal(text)
) {
return {
intent: "contract_usage_overview",
confidence: "high",
reasons: ["contract_usage_overview_signal_detected"]
};
}
if (deps.hasCustomerRevenueAndPaymentsSignal(text) && !deps.hasAccountBalanceSignal(text)) {
return {
intent: "customer_revenue_and_payments",
confidence: "high",
reasons: ["customer_revenue_and_payments_signal_detected"]
};
}
if (deps.hasSupplierPayoutsProfileSignal(text) && !deps.hasAccountBalanceSignal(text)) {
return {
intent: "supplier_payouts_profile",
confidence: "high",
reasons: ["supplier_payouts_profile_signal_detected"]
};
}
if (
deps.hasContractUsageAndValueSignal(text) &&
!deps.hasAccountBalanceSignal(text) &&
!deps.hasOpenContractsListSignal(text)
) {
return {
intent: "contract_usage_and_value",
confidence: "high",
reasons: ["contract_usage_and_value_signal_detected"]
};
}
if (deps.hasContractListByCounterpartySignal(text)) {
return {
intent: "list_contracts_by_counterparty",
confidence: "medium",
reasons: ["contracts_by_counterparty_signal_detected"]
};
}
if (deps.hasContractAnchorSignal(text) && deps.hasBankOperationSignal(text)) {
return {
intent: "bank_operations_by_contract",
confidence: "medium",
reasons: ["bank_ops_by_contract_signal_detected"]
};
}
if (
deps.hasContractAnchorSignal(text) &&
(deps.hasAny(text, deps.documentsByContractHints) || deps.hasDocumentSignal(text))
) {
return {
intent: "list_documents_by_contract",
confidence: "medium",
reasons: ["documents_by_contract_signal_detected"]
};
}
if (
deps.hasAny(text, deps.bankOperationsByCounterpartyHints) &&
(deps.hasPartyAnchorMention(text) || deps.hasLooseByAnchorMention(text) || deps.hasHeuristicCounterpartyAnchor(text))
) {
return {
intent: "bank_operations_by_counterparty",
confidence: "medium",
reasons: ["bank_ops_by_counterparty_signal_detected"]
};
}
if (hasUnicodeDocumentSignal(text) && hasUnicodeLikelyCounterpartyAfterBy(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["documents_by_counterparty_signal_detected"]
};
}
if (
(deps.hasAny(text, deps.documentsByCounterpartyHints) || deps.hasCounterpartyShipmentItemFlowSignal(text)) &&
(deps.hasPartyAnchorMention(text) ||
deps.hasLooseByAnchorMention(text) ||
deps.hasImplicitCounterpartyAnchorAroundDocs(text) ||
deps.hasHeuristicCounterpartyAnchor(text) ||
deps.hasCounterpartyShipmentItemFlowSignal(text))
) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: [
deps.hasCounterpartyShipmentItemFlowSignal(text)
? "counterparty_item_flow_signal_detected"
: "documents_by_counterparty_signal_detected"
]
};
}
if (deps.hasAccountBalanceSignal(text)) {
return {
intent: "account_balance_snapshot",
confidence: "high",
reasons: ["account_balance_signal_detected"]
};
}
if (deps.hasLooseByAnchorMention(text) && deps.hasGenericAddressLookupSignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "low",
reasons: ["generic_lookup_with_loose_anchor_fallback"]
};
}
if (
deps.hasAny(text, deps.openContractsHints) &&
(text.includes("договор") || text.includes("контракт") || text.includes("contract"))
) {
return {
intent: "open_contracts_confirmed_as_of_date",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
return null;
}