439 lines
17 KiB
TypeScript
439 lines
17 KiB
TypeScript
import type { AddressIntentResolution } from "../types/addressQuery";
|
||
|
||
type HintList = string[];
|
||
|
||
type CounterpartyIntentDeps = {
|
||
hasAny: (text: string, hints: string[]) => boolean;
|
||
openItemsHints: HintList;
|
||
openContractsHints: HintList;
|
||
documentsByCounterpartyHints: HintList;
|
||
bankOperationsByCounterpartyHints: HintList;
|
||
documentsByContractHints: HintList;
|
||
hasCounterpartyDebtLongevitySignal: (text: string) => boolean;
|
||
hasInventoryAgingSignal: (text: string) => boolean;
|
||
hasInventoryProvenanceSignalV2: (text: string) => boolean;
|
||
hasInventoryPurchaseDocumentsSignalV2: (text: string) => boolean;
|
||
hasInventorySaleTraceSignalV2: (text: string) => boolean;
|
||
hasAccountNumberAnchor: (text: string) => boolean;
|
||
hasCompactAccountCodeToken: (text: string) => boolean;
|
||
hasPeriodCoverageProfileSignal: (text: string) => boolean;
|
||
hasPartyAnchorMention: (text: string) => boolean;
|
||
hasContractAnchorSignal: (text: string) => boolean;
|
||
hasAccountBalanceSignal: (text: string) => boolean;
|
||
hasDocumentTypeAndAccountSectionProfileSignal: (text: string) => boolean;
|
||
hasCounterpartyPopulationAndRolesSignal: (text: string) => boolean;
|
||
hasCounterpartyActivityLifecycleSignal: (text: string) => boolean;
|
||
hasContractUsageOverviewSignal: (text: string) => boolean;
|
||
hasOpenContractsListSignal: (text: string) => boolean;
|
||
hasCustomerRevenueAndPaymentsSignal: (text: string) => boolean;
|
||
hasSupplierPayoutsProfileSignal: (text: string) => boolean;
|
||
hasContractUsageAndValueSignal: (text: string) => boolean;
|
||
hasContractListByCounterpartySignal: (text: string) => boolean;
|
||
hasBankOperationSignal: (text: string) => boolean;
|
||
hasDocumentSignal: (text: string) => boolean;
|
||
hasLooseByAnchorMention: (text: string) => boolean;
|
||
hasHeuristicCounterpartyAnchor: (text: string) => boolean;
|
||
hasCounterpartyShipmentItemFlowSignal: (text: string) => boolean;
|
||
hasImplicitCounterpartyAnchorAroundDocs: (text: string) => boolean;
|
||
hasGenericAddressLookupSignal: (text: string) => boolean;
|
||
};
|
||
|
||
function hasUnicodeOpenItemsAccountSignal(text: string): boolean {
|
||
const normalized = String(text ?? "").toLowerCase();
|
||
if (!normalized) {
|
||
return false;
|
||
}
|
||
return (
|
||
/(?:\u0445\u0432\u043e\u0441\u0442|\u0434\u043e\u043b\u0433|\u043d\u0435\u0437\u0430\u043a\u0440\u044b\u0442|\u0432\u0438\u0441)/iu.test(normalized) &&
|
||
/(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?\s*(?:\u2116|#)?\s*(?:60|62|76)(?:\.\d{2})?|(?:60|62|76)(?:\.\d{2})?\s*(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:\u0430|\u0443|\u043e\u043c|\u043e\u0432)?))/iu.test(
|
||
normalized
|
||
)
|
||
);
|
||
}
|
||
|
||
function hasUnicodeCounterpartyShipmentItemFlowSignal(text: string): boolean {
|
||
return /(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0430\u043b)/iu.test(text) &&
|
||
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text);
|
||
}
|
||
|
||
function hasUnicodePassiveShipmentByCounterpartySignal(text: string): boolean {
|
||
return (
|
||
/(?:\u043e\u0442\u0433\u0440\u0443\u0436\u0435\u043d(?:\u044b|\u043e|\u0430)?)/iu.test(text) &&
|
||
/(?:\u0442\u043e\u0432\u0430\u0440|\u0443\u0441\u043b\u0443\u0433|\u043f\u043e\u0437\u0438\u0446\u0438|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text) &&
|
||
/(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c)/iu.test(text)
|
||
);
|
||
}
|
||
|
||
function hasUnicodeDocumentSignal(text: string): boolean {
|
||
return /(?:\u0434\u043e\u043a(?:\u0443\u043c\u0435\u043d\u0442(?:\u044b|\u043e\u0432|\u0430\u043c|\u0430\u043c\u0438|\u0430\u0445)?|\u0438)?|\u0434\u043e\u043a\u0438)(?=$|[\s,.;:!?()])/iu.test(text);
|
||
}
|
||
|
||
function hasUnicodeLikelyCounterpartyAfterBy(text: string): boolean {
|
||
const match = text.match(/(?:^|[\s(])\u043f\u043e\s+([\p{L}\d][\p{L}\d._-]{1,})(?=$|[\s,.;:!?()])/iu);
|
||
if (!match) {
|
||
return false;
|
||
}
|
||
const token = String(match[1] ?? "").toLowerCase();
|
||
if (!token) {
|
||
return false;
|
||
}
|
||
const stopWords = new Set([
|
||
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0443",
|
||
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430",
|
||
"\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u043c",
|
||
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438",
|
||
"\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044e",
|
||
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u0438",
|
||
"\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u044e",
|
||
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0443",
|
||
"\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u043e\u043c",
|
||
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u0443",
|
||
"\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442\u043e\u043c",
|
||
"\u0441\u0447\u0435\u0442\u0443",
|
||
"\u0441\u0447\u0451\u0442\u0443",
|
||
"\u0441\u0447\u0435\u0442\u043e\u043c",
|
||
"\u0441\u0447\u0451\u0442\u043e\u043c",
|
||
"\u0434\u0430\u0442\u0435",
|
||
"\u0434\u0430\u0442\u0443",
|
||
"\u043f\u0435\u0440\u0438\u043e\u0434\u0443",
|
||
"\u043f\u0435\u0440\u0438\u043e\u0434",
|
||
"\u0441\u043a\u043b\u0430\u0434\u0443",
|
||
"\u0441\u043a\u043b\u0430\u0434\u0435",
|
||
"\u0431\u0430\u043d\u043a\u0443",
|
||
"\u0431\u0430\u043d\u043a\u0435",
|
||
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c",
|
||
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b",
|
||
"\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443",
|
||
"\u0434\u0430\u043d\u043d\u044b\u043c",
|
||
"\u044d\u0442\u0438\u043c",
|
||
"\u044d\u0442\u0438\u043c\u0438",
|
||
"\u0438\u0442\u043e\u0433\u0443",
|
||
"\u0438\u0442\u043e\u0433\u0430\u043c",
|
||
"\u0432\u0441\u0435\u043c\u0443",
|
||
"\u0432\u0441\u0435\u0439",
|
||
"\u0432\u0441\u0435\u043c",
|
||
"\u0432\u044b\u0432\u043e\u0434\u0443",
|
||
"\u0432\u044b\u0432\u043e\u0434\u0430\u043c"
|
||
]);
|
||
return !stopWords.has(token);
|
||
}
|
||
|
||
function hasUnicodeCounterpartyActivityLifecycleSignal(text: string): boolean {
|
||
const normalized = String(text ?? "").toLowerCase();
|
||
if (!normalized) {
|
||
return false;
|
||
}
|
||
|
||
const hasActivityAssessmentCue =
|
||
/(?:\u043a\u0430\u043a\s+[\p{L}\d_-]+\s+\u043e\u0446\u0435\u043d(?:\u0438\u0448\u044c|\u0438\u0442\u044c|\u0438\u0432\u0430\u0435\u0448\u044c)|\u043e\u0446\u0435\u043d(?:\u0438\u0442\u044c|\u043a\u0430)|\u043e\u0445\u0430\u0440\u0430\u043a\u0442\u0435\u0440\u0438\u0437(?:\u0443\u0435\u0448\u044c|\u043e\u0432\u0430\u0442\u044c)|\u0447\u0442\u043e\s+\u043c\u043e\u0436\u043d\u043e\s+\u0441\u043a\u0430\u0437\u0430\u0442\u044c\s+\u043e)/iu.test(
|
||
normalized
|
||
) &&
|
||
/(?:\u0434\u0435\u044f\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442|\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442|\u0440\u0430\u0431\u043e\u0442)/iu.test(
|
||
normalized
|
||
);
|
||
|
||
const hasActivityAgeCue =
|
||
/(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+\u043b\u0435\u0442\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u0438|\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+\u043b\u0435\u0442\s+\u0432\s+\u0431\u0430\u0437\u0435|\u0432\u043e\u0437\u0440\u0430\u0441\u0442\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u0438|\u043f\u0435\u0440\u0432(?:\u0430\u044f|\u044b\u0439|\u043e\u0435)\s+(?:\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u044c|\u043f\u043b\u0430\u0442\u0435\u0436|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0435|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442)|\u043f\u043e\u0441\u043b\u0435\u0434\u043d(?:\u044f\u044f|\u0438\u0439|\u0435\u0435)\s+\u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0441\u0442\u044c|\u0441\s+\u043a\u0430\u043a\u043e\u0433\u043e\s+\u0433\u043e\u0434\u0430\s+\u0430\u043a\u0442\u0438\u0432)/iu.test(
|
||
normalized
|
||
);
|
||
if (!hasActivityAgeCue && !hasActivityAssessmentCue) {
|
||
return false;
|
||
}
|
||
|
||
const hasOneCLexeme =
|
||
/(?:\u0432\s+\u0431\u0430\u0437\u0435\s+1[\u0441c]|\u0432\s+1[\u0441c]\s+\u0431\u0430\u0437\u0435|\u0438\u0437\s+1[\u0441c])/iu.test(
|
||
normalized
|
||
);
|
||
const hasBusinessAnchor =
|
||
/(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u043d\u0430\u0448\u0435\u0439\s+\u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438|\u043d\u0430\u0448\u0435\u0439\s+\u043e\u0440\u0433\u0430\u043d\u0438\u0437\u0430\u0446\u0438\u0438|\u043e\u043e\u043e|\u0430\u043e|\u0437\u0430\u043e|\u0438\u043f)/iu.test(
|
||
normalized
|
||
);
|
||
|
||
return hasOneCLexeme || hasBusinessAnchor || hasUnicodeLikelyCounterpartyAfterBy(normalized);
|
||
}
|
||
|
||
export function resolveCounterpartyAddressIntent(
|
||
text: string,
|
||
deps: CounterpartyIntentDeps
|
||
): AddressIntentResolution | null {
|
||
if (hasUnicodeOpenItemsAccountSignal(text)) {
|
||
return {
|
||
intent: "open_items_by_counterparty_or_contract",
|
||
confidence: "medium",
|
||
reasons: ["open_items_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
/(?:хвост|долг|незакрыт|вис)/iu.test(text) &&
|
||
/(?:счету?|сч[её]ту?)\s*60|60\s*(?:счет|сч[её]т)/iu.test(text)
|
||
) {
|
||
return {
|
||
intent: "open_items_by_counterparty_or_contract",
|
||
confidence: "medium",
|
||
reasons: ["open_items_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
/отгружал/iu.test(text) &&
|
||
/(?:товар|услуг|позици|номенклатур)/iu.test(text)
|
||
) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["counterparty_item_flow_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
/отгружен(?:ы|о|а)?/iu.test(text) &&
|
||
/(?:товар|услуг|позици|номенклатур)/iu.test(text) &&
|
||
/контрагентом/iu.test(text)
|
||
) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["counterparty_item_flow_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasUnicodeCounterpartyShipmentItemFlowSignal(text)) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["counterparty_item_flow_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasUnicodePassiveShipmentByCounterpartySignal(text)) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["counterparty_item_flow_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasUnicodeCounterpartyActivityLifecycleSignal(text)) {
|
||
return {
|
||
intent: "counterparty_activity_lifecycle",
|
||
confidence: "high",
|
||
reasons: ["counterparty_activity_lifecycle_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasOpenContractsListSignal(text)) {
|
||
return {
|
||
intent: "open_contracts_confirmed_as_of_date",
|
||
confidence: "medium",
|
||
reasons: ["open_contract_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasAny(text, deps.openItemsHints) &&
|
||
!deps.hasCounterpartyDebtLongevitySignal(text) &&
|
||
!deps.hasInventoryAgingSignal(text) &&
|
||
!deps.hasInventoryProvenanceSignalV2(text) &&
|
||
!deps.hasInventoryPurchaseDocumentsSignalV2(text) &&
|
||
!deps.hasInventorySaleTraceSignalV2(text) &&
|
||
(
|
||
/(?:контраг|договор|контракт|counterparty|contract|покупател|клиент|заказчик|customer|client|buyer|supplier|поставщик)/iu.test(
|
||
text
|
||
) ||
|
||
deps.hasAccountNumberAnchor(text) ||
|
||
deps.hasCompactAccountCodeToken(text)
|
||
)
|
||
) {
|
||
return {
|
||
intent: "open_items_by_counterparty_or_contract",
|
||
confidence: "medium",
|
||
reasons: ["open_items_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasPeriodCoverageProfileSignal(text) &&
|
||
!deps.hasPartyAnchorMention(text) &&
|
||
!deps.hasContractAnchorSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text)
|
||
) {
|
||
return {
|
||
intent: "period_coverage_profile",
|
||
confidence: "high",
|
||
reasons: ["period_coverage_profile_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasDocumentTypeAndAccountSectionProfileSignal(text) &&
|
||
!deps.hasPartyAnchorMention(text) &&
|
||
!deps.hasContractAnchorSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text)
|
||
) {
|
||
return {
|
||
intent: "document_type_and_account_section_profile",
|
||
confidence: "high",
|
||
reasons: ["document_type_and_account_section_profile_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasCounterpartyPopulationAndRolesSignal(text) &&
|
||
!deps.hasContractAnchorSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text)
|
||
) {
|
||
return {
|
||
intent: "counterparty_population_and_roles",
|
||
confidence: "high",
|
||
reasons: ["counterparty_population_and_roles_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasCounterpartyActivityLifecycleSignal(text) &&
|
||
!deps.hasContractAnchorSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text)
|
||
) {
|
||
return {
|
||
intent: "counterparty_activity_lifecycle",
|
||
confidence: "high",
|
||
reasons: ["counterparty_activity_lifecycle_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasContractUsageOverviewSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text) &&
|
||
!deps.hasOpenContractsListSignal(text)
|
||
) {
|
||
return {
|
||
intent: "contract_usage_overview",
|
||
confidence: "high",
|
||
reasons: ["contract_usage_overview_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasCustomerRevenueAndPaymentsSignal(text) && !deps.hasAccountBalanceSignal(text)) {
|
||
return {
|
||
intent: "customer_revenue_and_payments",
|
||
confidence: "high",
|
||
reasons: ["customer_revenue_and_payments_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasSupplierPayoutsProfileSignal(text) && !deps.hasAccountBalanceSignal(text)) {
|
||
return {
|
||
intent: "supplier_payouts_profile",
|
||
confidence: "high",
|
||
reasons: ["supplier_payouts_profile_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasContractUsageAndValueSignal(text) &&
|
||
!deps.hasAccountBalanceSignal(text) &&
|
||
!deps.hasOpenContractsListSignal(text)
|
||
) {
|
||
return {
|
||
intent: "contract_usage_and_value",
|
||
confidence: "high",
|
||
reasons: ["contract_usage_and_value_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasContractListByCounterpartySignal(text)) {
|
||
return {
|
||
intent: "list_contracts_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["contracts_by_counterparty_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasContractAnchorSignal(text) && deps.hasBankOperationSignal(text)) {
|
||
return {
|
||
intent: "bank_operations_by_contract",
|
||
confidence: "medium",
|
||
reasons: ["bank_ops_by_contract_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasContractAnchorSignal(text) &&
|
||
(deps.hasAny(text, deps.documentsByContractHints) || deps.hasDocumentSignal(text))
|
||
) {
|
||
return {
|
||
intent: "list_documents_by_contract",
|
||
confidence: "medium",
|
||
reasons: ["documents_by_contract_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasAny(text, deps.bankOperationsByCounterpartyHints) &&
|
||
(deps.hasPartyAnchorMention(text) || deps.hasLooseByAnchorMention(text) || deps.hasHeuristicCounterpartyAnchor(text))
|
||
) {
|
||
return {
|
||
intent: "bank_operations_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["bank_ops_by_counterparty_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasUnicodeDocumentSignal(text) && hasUnicodeLikelyCounterpartyAfterBy(text)) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: ["documents_by_counterparty_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
(deps.hasAny(text, deps.documentsByCounterpartyHints) || deps.hasCounterpartyShipmentItemFlowSignal(text)) &&
|
||
(deps.hasPartyAnchorMention(text) ||
|
||
deps.hasLooseByAnchorMention(text) ||
|
||
deps.hasImplicitCounterpartyAnchorAroundDocs(text) ||
|
||
deps.hasHeuristicCounterpartyAnchor(text) ||
|
||
deps.hasCounterpartyShipmentItemFlowSignal(text))
|
||
) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "medium",
|
||
reasons: [
|
||
deps.hasCounterpartyShipmentItemFlowSignal(text)
|
||
? "counterparty_item_flow_signal_detected"
|
||
: "documents_by_counterparty_signal_detected"
|
||
]
|
||
};
|
||
}
|
||
|
||
if (deps.hasAccountBalanceSignal(text)) {
|
||
return {
|
||
intent: "account_balance_snapshot",
|
||
confidence: "high",
|
||
reasons: ["account_balance_signal_detected"]
|
||
};
|
||
}
|
||
|
||
if (deps.hasLooseByAnchorMention(text) && deps.hasGenericAddressLookupSignal(text)) {
|
||
return {
|
||
intent: "list_documents_by_counterparty",
|
||
confidence: "low",
|
||
reasons: ["generic_lookup_with_loose_anchor_fallback"]
|
||
};
|
||
}
|
||
|
||
if (
|
||
deps.hasAny(text, deps.openContractsHints) &&
|
||
(text.includes("договор") || text.includes("контракт") || text.includes("contract"))
|
||
) {
|
||
return {
|
||
intent: "open_contracts_confirmed_as_of_date",
|
||
confidence: "medium",
|
||
reasons: ["open_contract_signal_detected"]
|
||
};
|
||
}
|
||
|
||
return null;
|
||
}
|