NODEDC_1C/llm_normalizer/backend/dist/services/addressCounterpartyIntentSi...

299 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.resolveCounterpartyAddressIntent = resolveCounterpartyAddressIntent;
function hasUnicodeOpenItemsAccountSignal(text) {
const normalized = String(text ?? "").toLowerCase();
if (!normalized) {
return false;
}
return (/(?:\u0445\u0432\u043e\u0441\u0442|\u0434\u043e\u043b\u0433|\u043d\u0435\u0437\u0430\u043a\u0440\u044b\u0442|\u0432\u0438\u0441)/iu.test(normalized) &&
/(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?\s*(?:\u2116|#)?\s*(?:60|62|76)(?:\.\d{2})?|(?:60|62|76)(?:\.\d{2})?\s*(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?))/iu.test(normalized));
}
function hasUnicodeCounterpartyShipmentItemFlowSignal(text) {
return /(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0430\u043b)/iu.test(text) &&
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text);
}
function hasUnicodePassiveShipmentByCounterpartySignal(text) {
return (/(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0435\u043d(?:\u044b|\u043e|\u0430)?)/iu.test(text) &&
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text) &&
/(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c)/iu.test(text));
}
function hasUnicodeDocumentSignal(text) {
return /(?:\u0434\u043e\u043a(?:\u0443\u043c\u0435\u043d\u0442(?:\u044b|\u043e\u0432|\u0430\u043c|\u0430\u043c\u0438|\u0430\u0445)?|\u0438)?|\u0434\u043e\u043a\u0438)(?=$|[\s,.;:!?()])/iu.test(text);
}
function hasUnicodeLikelyCounterpartyAfterBy(text) {
const match = text.match(/(?:^|[\s(])\u043f\u043e\s+([\p{L}\d][\p{L}\d._-]{1,})(?=$|[\s,.;:!?()])/iu);
if (!match) {
return false;
}
const token = String(match[1] ?? "").toLowerCase();
if (!token) {
return false;
}
const stopWords = new Set([
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0443",
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430",
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c",
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438",
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044e",
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u0438",
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u044e",
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0443",
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u043e\u043c",
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u0443",
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u043e\u043c",
"\u0441\u0447\u0435\u0442\u0443",
"\u0441\u0447\u0451\u0442\u0443",
"\u0441\u0447\u0435\u0442\u043e\u043c",
"\u0441\u0447\u0451\u0442\u043e\u043c",
"\u0434\u0430\u0442\u0435",
"\u0434\u0430\u0442\u0443",
"\u043f\u0435\u0440\u0438\u043e\u0434\u0443",
"\u043f\u0435\u0440\u0438\u043e\u0434",
"\u0441\u043a\u043b\u0430\u0434\u0443",
"\u0441\u043a\u043b\u0430\u0434\u0435",
"\u0431\u0430\u043d\u043a\u0443",
"\u0431\u0430\u043d\u043a\u0435",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b",
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443"
]);
return !stopWords.has(token);
}
function hasUnicodeCounterpartyActivityLifecycleSignal(text) {
const normalized = String(text ?? "").toLowerCase();
if (!normalized) {
return false;
}
const hasActivityAgeCue = /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+\u043b\u0435\u0442\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u0438|\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+\u043b\u0435\u0442\s+\u0432\s+\u0431\u0430\u0437\u0435|\u0432\u043e\u0437\u0440\u0430\u0441\u0442\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u0438|\u043f\u0435\u0440\u0432(?:\u0430\u044f|\u044b\u0439|\u043e\u0435)\s+(?:\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u044c|\u043f\u043b\u0430\u0442\u0435\u0436|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0435|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442)|\u043f\u043e\u0441\u043b\u0435\u0434\u043d(?:\u044f\u044f|\u0438\u0439|\u0435\u0435)\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u044c|\u0441\s+\u043a\u0430\u043a\u043e\u0433\u043e\s+\u0433\u043e\u0434\u0430\s+\u0430\u043a\u0442\u0438\u0432)/iu.test(normalized);
if (!hasActivityAgeCue) {
return false;
}
const hasOneCLexeme = /(?:\u0432\s+\u0431\u0430\u0437\u0435\s+1[\u0441c]|\u0432\s+1[\u0441c]\s+\u0431\u0430\u0437\u0435|\u0438\u0437\s+1[\u0441c])/iu.test(normalized);
const hasBusinessAnchor = /(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043d\u0430\u0448\u0435\u0439\s+\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438|\u043d\u0430\u0448\u0435\u0439\s+\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u0438|\u043e\u043e\u043e|\u0430\u043e|\u0437\u0430\u043e|\u0438\u043f)/iu.test(normalized);
return hasOneCLexeme || hasBusinessAnchor || hasUnicodeLikelyCounterpartyAfterBy(normalized);
}
function resolveCounterpartyAddressIntent(text, deps) {
if (hasUnicodeOpenItemsAccountSignal(text)) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (/(?:хвост|долг|незакрыт|вис)/iu.test(text) &&
/(?:счету?|сч[её]ту?)\s*60|60\s*(?:счет|сч[её]т)/iu.test(text)) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (/отгружал/iu.test(text) &&
/(?:товар|услуг|позици|номенклатур)/iu.test(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (/отгружен(?:ы|о|а)?/iu.test(text) &&
/(?:товар|услуг|позици|номенклатур)/iu.test(text) &&
/контрагентом/iu.test(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (hasUnicodeCounterpartyShipmentItemFlowSignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (hasUnicodePassiveShipmentByCounterpartySignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["counterparty_item_flow_signal_detected"]
};
}
if (hasUnicodeCounterpartyActivityLifecycleSignal(text)) {
return {
intent: "counterparty_activity_lifecycle",
confidence: "high",
reasons: ["counterparty_activity_lifecycle_signal_detected"]
};
}
if (deps.hasOpenContractsListSignal(text)) {
return {
intent: "open_contracts_confirmed_as_of_date",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
if (deps.hasAny(text, deps.openItemsHints) &&
!deps.hasCounterpartyDebtLongevitySignal(text) &&
!deps.hasInventoryAgingSignal(text) &&
!deps.hasInventoryProvenanceSignalV2(text) &&
!deps.hasInventoryPurchaseDocumentsSignalV2(text) &&
!deps.hasInventorySaleTraceSignalV2(text) &&
(/(?:контраг|договор|контракт|counterparty|contract|покупател|клиент|заказчик|customer|client|buyer|supplier|поставщик)/iu.test(text) ||
deps.hasAccountNumberAnchor(text) ||
deps.hasCompactAccountCodeToken(text))) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (deps.hasPeriodCoverageProfileSignal(text) &&
!deps.hasPartyAnchorMention(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)) {
return {
intent: "period_coverage_profile",
confidence: "high",
reasons: ["period_coverage_profile_signal_detected"]
};
}
if (deps.hasDocumentTypeAndAccountSectionProfileSignal(text) &&
!deps.hasPartyAnchorMention(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)) {
return {
intent: "document_type_and_account_section_profile",
confidence: "high",
reasons: ["document_type_and_account_section_profile_signal_detected"]
};
}
if (deps.hasCounterpartyPopulationAndRolesSignal(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)) {
return {
intent: "counterparty_population_and_roles",
confidence: "high",
reasons: ["counterparty_population_and_roles_signal_detected"]
};
}
if (deps.hasCounterpartyActivityLifecycleSignal(text) &&
!deps.hasContractAnchorSignal(text) &&
!deps.hasAccountBalanceSignal(text)) {
return {
intent: "counterparty_activity_lifecycle",
confidence: "high",
reasons: ["counterparty_activity_lifecycle_signal_detected"]
};
}
if (deps.hasContractUsageOverviewSignal(text) &&
!deps.hasAccountBalanceSignal(text) &&
!deps.hasOpenContractsListSignal(text)) {
return {
intent: "contract_usage_overview",
confidence: "high",
reasons: ["contract_usage_overview_signal_detected"]
};
}
if (deps.hasCustomerRevenueAndPaymentsSignal(text) && !deps.hasAccountBalanceSignal(text)) {
return {
intent: "customer_revenue_and_payments",
confidence: "high",
reasons: ["customer_revenue_and_payments_signal_detected"]
};
}
if (deps.hasSupplierPayoutsProfileSignal(text) && !deps.hasAccountBalanceSignal(text)) {
return {
intent: "supplier_payouts_profile",
confidence: "high",
reasons: ["supplier_payouts_profile_signal_detected"]
};
}
if (deps.hasContractUsageAndValueSignal(text) &&
!deps.hasAccountBalanceSignal(text) &&
!deps.hasOpenContractsListSignal(text)) {
return {
intent: "contract_usage_and_value",
confidence: "high",
reasons: ["contract_usage_and_value_signal_detected"]
};
}
if (deps.hasContractListByCounterpartySignal(text)) {
return {
intent: "list_contracts_by_counterparty",
confidence: "medium",
reasons: ["contracts_by_counterparty_signal_detected"]
};
}
if (deps.hasContractAnchorSignal(text) && deps.hasBankOperationSignal(text)) {
return {
intent: "bank_operations_by_contract",
confidence: "medium",
reasons: ["bank_ops_by_contract_signal_detected"]
};
}
if (deps.hasContractAnchorSignal(text) &&
(deps.hasAny(text, deps.documentsByContractHints) || deps.hasDocumentSignal(text))) {
return {
intent: "list_documents_by_contract",
confidence: "medium",
reasons: ["documents_by_contract_signal_detected"]
};
}
if (deps.hasAny(text, deps.bankOperationsByCounterpartyHints) &&
(deps.hasPartyAnchorMention(text) || deps.hasLooseByAnchorMention(text) || deps.hasHeuristicCounterpartyAnchor(text))) {
return {
intent: "bank_operations_by_counterparty",
confidence: "medium",
reasons: ["bank_ops_by_counterparty_signal_detected"]
};
}
if (hasUnicodeDocumentSignal(text) && hasUnicodeLikelyCounterpartyAfterBy(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["documents_by_counterparty_signal_detected"]
};
}
if ((deps.hasAny(text, deps.documentsByCounterpartyHints) || deps.hasCounterpartyShipmentItemFlowSignal(text)) &&
(deps.hasPartyAnchorMention(text) ||
deps.hasLooseByAnchorMention(text) ||
deps.hasImplicitCounterpartyAnchorAroundDocs(text) ||
deps.hasHeuristicCounterpartyAnchor(text) ||
deps.hasCounterpartyShipmentItemFlowSignal(text))) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: [
deps.hasCounterpartyShipmentItemFlowSignal(text)
? "counterparty_item_flow_signal_detected"
: "documents_by_counterparty_signal_detected"
]
};
}
if (deps.hasAccountBalanceSignal(text)) {
return {
intent: "account_balance_snapshot",
confidence: "high",
reasons: ["account_balance_signal_detected"]
};
}
if (deps.hasLooseByAnchorMention(text) && deps.hasGenericAddressLookupSignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "low",
reasons: ["generic_lookup_with_loose_anchor_fallback"]
};
}
if (deps.hasAny(text, deps.openContractsHints) &&
(text.includes("договор") || text.includes("контракт") || text.includes("contract"))) {
return {
intent: "open_contracts_confirmed_as_of_date",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
return null;
}