307 lines
6.9 KiB
TypeScript
307 lines
6.9 KiB
TypeScript
import type { AddressModeDetection } from "../types/addressQuery";
|
||
|
||
const ADDRESS_ACTION_TOKENS = [
|
||
"show",
|
||
"list",
|
||
"find",
|
||
"get",
|
||
"lookup",
|
||
"open",
|
||
"balance",
|
||
"debt",
|
||
"owe",
|
||
"покажи",
|
||
"покаж",
|
||
"показ",
|
||
"список",
|
||
"найди",
|
||
"найд",
|
||
"выведи",
|
||
"вывед",
|
||
"кто",
|
||
"кому",
|
||
"какие",
|
||
"что по",
|
||
"че по",
|
||
"чё по",
|
||
"остаток",
|
||
"скока",
|
||
"сколько",
|
||
"долг",
|
||
"задолж",
|
||
"хвост",
|
||
"незакрыт"
|
||
];
|
||
|
||
const ADDRESS_ENTITY_TOKENS = [
|
||
"counterparty",
|
||
"counterparties",
|
||
"company",
|
||
"organization",
|
||
"supplier",
|
||
"vendor",
|
||
"customer",
|
||
"client",
|
||
"partner",
|
||
"contract",
|
||
"contracts",
|
||
"account",
|
||
"accounts",
|
||
"document",
|
||
"documents",
|
||
"balance",
|
||
"payable",
|
||
"payables",
|
||
"receivable",
|
||
"receivables",
|
||
"owe",
|
||
"owes",
|
||
"owed",
|
||
"контрагент",
|
||
"контра",
|
||
"компан",
|
||
"организац",
|
||
"поставщик",
|
||
"клиент",
|
||
"покупател",
|
||
"партнер",
|
||
"банк",
|
||
"выписк",
|
||
"операц",
|
||
"транзак",
|
||
"договор",
|
||
"счет",
|
||
"счёт",
|
||
"документ",
|
||
"доки",
|
||
"док",
|
||
"остаток",
|
||
"дебитор",
|
||
"кредитор",
|
||
"аванс",
|
||
"оплат",
|
||
"поступлен",
|
||
"поступлени",
|
||
"списан",
|
||
"списани",
|
||
"долг",
|
||
"должен",
|
||
"должны",
|
||
"должна"
|
||
];
|
||
|
||
const DEEP_REASONING_TOKENS = [
|
||
"why",
|
||
"because",
|
||
"root cause",
|
||
"mechanism",
|
||
"prove",
|
||
"chain",
|
||
"почему",
|
||
"причин",
|
||
"механизм",
|
||
"докажи",
|
||
"цепоч",
|
||
"разрыв",
|
||
"ошибк"
|
||
];
|
||
|
||
function hasLooseByAnchorMention(text: string): boolean {
|
||
const match = text.match(/(?:^|\s)по\s+([a-zа-яё][a-zа-яё0-9._-]{1,})(?=[\s,.;:!?)]|$)/iu);
|
||
if (!match) {
|
||
return false;
|
||
}
|
||
const token = String(match[1] ?? "").toLowerCase();
|
||
if (!token) {
|
||
return false;
|
||
}
|
||
const stopWords = new Set([
|
||
"контрагенту",
|
||
"контрагента",
|
||
"контре",
|
||
"компании",
|
||
"компанию",
|
||
"организации",
|
||
"организацию",
|
||
"поставщику",
|
||
"поставщика",
|
||
"клиенту",
|
||
"клиента",
|
||
"покупателю",
|
||
"покупателя",
|
||
"партнеру",
|
||
"партнера",
|
||
"договору",
|
||
"договора",
|
||
"счету",
|
||
"счёту",
|
||
"дате",
|
||
"периоду",
|
||
"период",
|
||
"документам",
|
||
"докам",
|
||
"взаиморасчетам",
|
||
"взаиморасчётам"
|
||
]);
|
||
return !stopWords.has(token);
|
||
}
|
||
|
||
function hasAddressFollowupSignal(text: string): boolean {
|
||
if (/(?:за\s+любой\s+период|за\s+вс[её]\s+время|for\s+all\s+time|all\s+time)/iu.test(text)) {
|
||
return true;
|
||
}
|
||
if (/(?:\bесть\s+что(?:-|\s)?то\b|\bесть\s+ли\b|\bчто\s+есть\b)/iu.test(text)) {
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function hasDocsOrBankSignal(text: string): boolean {
|
||
return /(?:док(?:и|умент|ументы|ументов)|docs?|documents?|банк|выписк|платеж|платёж|оплат|поступлен|списан|транзак|transactions?|bank\s+ops|bank\s+operations?)/iu.test(
|
||
text
|
||
);
|
||
}
|
||
|
||
function hasAccountCodeAnchor(text: string): boolean {
|
||
return /(?<![\d-])\d{2}(?:[.,]\d{1,2})(?![\d-])/u.test(text);
|
||
}
|
||
|
||
function hasLikelyCounterpartyToken(text: string): boolean {
|
||
const stopWords = new Set([
|
||
"за",
|
||
"с",
|
||
"по",
|
||
"на",
|
||
"и",
|
||
"или",
|
||
"док",
|
||
"доки",
|
||
"документ",
|
||
"документы",
|
||
"документов",
|
||
"банк",
|
||
"банковские",
|
||
"операции",
|
||
"платежи",
|
||
"платеж",
|
||
"платёж",
|
||
"контрагент",
|
||
"контрагенту",
|
||
"контрагента",
|
||
"компания",
|
||
"компании",
|
||
"организация",
|
||
"организации",
|
||
"год",
|
||
"года",
|
||
"г",
|
||
"плс",
|
||
"pls",
|
||
"пж",
|
||
"пжлст",
|
||
"пожалуйста",
|
||
"бля",
|
||
"блять",
|
||
"епт",
|
||
"ёпт",
|
||
"епта",
|
||
"нах",
|
||
"нахуй",
|
||
"покеж",
|
||
"покажи",
|
||
"показать",
|
||
"покаж",
|
||
"выведи",
|
||
"show",
|
||
"list",
|
||
"please",
|
||
"all",
|
||
"vse"
|
||
]);
|
||
const tokens = String(text ?? "")
|
||
.split(/[^a-zа-яё0-9._-]+/iu)
|
||
.map((token) => token.trim())
|
||
.filter((token) => token.length >= 2);
|
||
return tokens.some((token) => {
|
||
const lowered = token.toLowerCase();
|
||
if (stopWords.has(lowered)) {
|
||
return false;
|
||
}
|
||
if (/^\d+$/.test(lowered)) {
|
||
return false;
|
||
}
|
||
if (/^(?:19|20)\d{2}$/.test(lowered)) {
|
||
return false;
|
||
}
|
||
return true;
|
||
});
|
||
}
|
||
|
||
function hasAnyToken(text: string, tokens: string[]): boolean {
|
||
return tokens.some((token) => text.includes(token));
|
||
}
|
||
|
||
export function detectAddressQuestionMode(userMessage: string): AddressModeDetection {
|
||
const text = String(userMessage ?? "").trim().toLowerCase();
|
||
if (!text) {
|
||
return {
|
||
mode: "unsupported",
|
||
confidence: "low",
|
||
reasons: ["empty_message"]
|
||
};
|
||
}
|
||
|
||
const hasAddressAction = hasAnyToken(text, ADDRESS_ACTION_TOKENS);
|
||
const hasAddressEntity = hasAnyToken(text, ADDRESS_ENTITY_TOKENS);
|
||
const hasDeepReasoning = hasAnyToken(text, DEEP_REASONING_TOKENS);
|
||
const hasLooseByAnchor = hasLooseByAnchorMention(text);
|
||
const hasFollowupSignal = hasAddressFollowupSignal(text);
|
||
const hasAccountCode = hasAccountCodeAnchor(text);
|
||
|
||
if (hasAddressAction && (hasAddressEntity || hasAccountCode) && !hasDeepReasoning) {
|
||
return {
|
||
mode: "address_query",
|
||
confidence: "high",
|
||
reasons: ["address_action_detected", "address_entity_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasLooseByAnchor && (hasAddressAction || hasAddressEntity || hasFollowupSignal || hasAccountCode) && !hasDeepReasoning) {
|
||
return {
|
||
mode: "address_query",
|
||
confidence: "medium",
|
||
reasons: ["loose_by_anchor_detected", ...(hasFollowupSignal ? ["address_followup_signal_detected"] : [])]
|
||
};
|
||
}
|
||
|
||
if ((hasAddressEntity || hasAccountCode) && !hasDeepReasoning) {
|
||
return {
|
||
mode: "address_query",
|
||
confidence: "medium",
|
||
reasons: ["address_entity_detected"]
|
||
};
|
||
}
|
||
|
||
if (!hasDeepReasoning && hasDocsOrBankSignal(text) && (hasLooseByAnchor || hasLikelyCounterpartyToken(text))) {
|
||
return {
|
||
mode: "address_query",
|
||
confidence: "medium",
|
||
reasons: ["docs_or_bank_signal_detected", "anchor_like_token_detected"]
|
||
};
|
||
}
|
||
|
||
if (hasDeepReasoning) {
|
||
return {
|
||
mode: "deep_analysis",
|
||
confidence: "high",
|
||
reasons: ["deep_reasoning_signal_detected"]
|
||
};
|
||
}
|
||
|
||
return {
|
||
mode: "unsupported",
|
||
confidence: "low",
|
||
reasons: ["no_address_or_deep_signal"]
|
||
};
|
||
}
|