From 58b293a3e4e32e273c4f766ec5144e53539128ab Mon Sep 17 00:00:00 2001 From: dctouch Date: Fri, 3 Apr 2026 00:05:58 +0300 Subject: [PATCH] =?UTF-8?q?=D0=90=D0=94=D0=A0=D0=95=D0=A1=D0=9D=D0=AB?= =?UTF-8?q?=D0=99=20=D0=A0=D0=95=D0=96=D0=98=D0=9C=20-ADDRESS:=D0=A8=D0=B0?= =?UTF-8?q?=D0=B3=202=20-=20=20=D0=A3=D0=BD=D0=B8=D0=B2=D0=B5=D1=80=D1=81?= =?UTF-8?q?=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20value-=D0=B2?= =?UTF-8?q?=D0=BE=D0=BF=D1=80=D0=BE=D1=81=D0=BE=D0=B2=20=D0=BE=D0=B1=D1=89?= =?UTF-8?q?=D0=B5=D0=B3=D0=BE=20=D0=B4=D0=BE=D0=BC=D0=B5=D0=BD=D0=B0=20(TO?= =?UTF-8?q?P-20,=20=D0=B1=D0=B5=D0=B7=20=D1=81=D0=BB=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D1=80=D0=B5=D0=B9=20=D0=BA=D0=BB=D0=B8=D0=B5=D0=BD=D1=82=D0=BE?= =?UTF-8?q?=D0=B2/=D0=BF=D0=BE=D1=81=D1=82=D0=B0=D0=B2=D1=89=D0=B8=D0=BA?= =?UTF-8?q?=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...n_questions_analysis_plan_v1_2026-04-02.md | 2 + .../temp_batch3_value_top20_2026-04-02.json | 274 +++++++++++ .../dist/services/addressFilterExtractor.js | 7 +- .../dist/services/addressIntentResolver.js | 216 +++++++++ .../dist/services/addressQueryClassifier.js | 10 + .../dist/services/addressQueryService.js | 5 +- .../dist/services/addressRecipeCatalog.js | 137 +++++- .../services/address_runtime/composeStage.js | 320 +++++++++++++ .../address_runtime/predecomposeContract.js | 5 +- .../src/services/addressFilterExtractor.ts | 7 +- .../src/services/addressIntentResolver.ts | 254 +++++++++++ .../src/services/addressQueryClassifier.ts | 10 + .../src/services/addressQueryService.ts | 3 + .../src/services/addressRecipeCatalog.ts | 121 +++++ .../services/address_runtime/composeStage.ts | 431 ++++++++++++++++++ .../address_runtime/predecomposeContract.ts | 5 +- .../backend/src/types/addressQuery.ts | 6 + .../tests/addressQueryRuntimeM23.test.ts | 333 ++++++++++++++ 18 files changed, 2126 insertions(+), 20 deletions(-) create mode 100644 docs/ADDRESS/question_sets/temp_batch3_value_top20_2026-04-02.json diff --git a/docs/ADDRESS/address_query/general_domain_questions_analysis_plan_v1_2026-04-02.md b/docs/ADDRESS/address_query/general_domain_questions_analysis_plan_v1_2026-04-02.md index 9764125..4e7b516 100644 --- a/docs/ADDRESS/address_query/general_domain_questions_analysis_plan_v1_2026-04-02.md +++ b/docs/ADDRESS/address_query/general_domain_questions_analysis_plan_v1_2026-04-02.md @@ -317,6 +317,7 @@ Routes: Результат: - клиентская/поставщическая ценность и контрактные рейтинги. +- стандарт ранжирования для управленческой выдачи: `top-20` (если пользователь явно не просит другой лимит). ### Batch 4 (задолженности и aging) Вопросы: @@ -350,6 +351,7 @@ Routes: - `strict_pass(route)=100%` на domain pack - `false_factual_rate=0` - `execution_error_count=0` +- для ranking-вопросов в acceptance-паке использовать `top-20` как дефолтный формат ответа. 3. После каждой пачки: - обязательный global regression `102 + 25` diff --git a/docs/ADDRESS/question_sets/temp_batch3_value_top20_2026-04-02.json b/docs/ADDRESS/question_sets/temp_batch3_value_top20_2026-04-02.json new file mode 100644 index 0000000..857d7ea --- /dev/null +++ b/docs/ADDRESS/question_sets/temp_batch3_value_top20_2026-04-02.json @@ -0,0 +1,274 @@ +[ + { + "id": "B3_C001", + "group": "canonical", + "text": "Покажи топ-20 заказчиков по сумме поступлений за все время.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C002", + "group": "canonical", + "text": "Покажи топ-20 заказчиков по сумме поступлений за 2020 год.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C003", + "group": "canonical", + "text": "Покажи топ-20 заказчиков по количеству входящих платежных операций за все время.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C004", + "group": "canonical", + "text": "Покажи топ-20 заказчиков по максимальной сумме одной входящей операции за все время.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C005", + "group": "canonical", + "text": "Покажи топ-20 заказчиков по среднему чеку среди активных клиентов (минимум 3 входящие операции).", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C006", + "group": "canonical", + "text": "Покажи топ-20 самых крупных разовых сделок по поступлениям (дата, контрагент, документ, сумма).", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C007", + "group": "canonical", + "text": "Покажи топ-20 самых маленьких разовых сделок по поступлениям среди активных заказчиков.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C008", + "group": "canonical", + "text": "Покажи топ-20 поставщиков по сумме выплат за все время.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C009", + "group": "canonical", + "text": "Покажи топ-20 поставщиков по сумме выплат за 2020 год.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C010", + "group": "canonical", + "text": "Покажи топ-20 поставщиков по количеству исходящих платежных операций за все время.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C011", + "group": "canonical", + "text": "Покажи топ-20 самых крупных разовых выплат поставщикам (дата, контрагент, документ, сумма).", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C012", + "group": "canonical", + "text": "Покажи топ-20 договоров по сумме оборота за все время.", + "expected_intent": "contract_usage_and_value", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_C013", + "group": "canonical", + "text": "Покажи топ-20 договоров с минимальным бюджетом среди активных договоров.", + "expected_intent": "contract_usage_and_value", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N001", + "group": "noisy_slang", + "text": "какие клиенты самые доходные, выдай топ-20", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N002", + "group": "noisy_slang", + "text": "топ-20 заказчиков по деньгам за все время", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N003", + "group": "noisy_slang", + "text": "за 20й год кто нам больше всего занес, топ-20", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N004", + "group": "noisy_slang", + "text": "кто платит чаще всего, дай топ-20", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N005", + "group": "noisy_slang", + "text": "покажи топ-20 самых жирных сделок по поступлениям", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N006", + "group": "noisy_slang", + "text": "покажи топ-20 самых маленьких сделок по бюджету", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N007", + "group": "noisy_slang", + "text": "кому мы больше всего сгрузили денег, топ-20 поставщиков", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N008", + "group": "noisy_slang", + "text": "топ-20 поставщиков по выплатам за все время", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N009", + "group": "noisy_slang", + "text": "за 2020 год кому ушло больше всего денег, топ-20", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N010", + "group": "noisy_slang", + "text": "поставщики с максимальным числом выплат, топ-20", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N011", + "group": "noisy_slang", + "text": "договоры по обороту ранкни и дай топ-20", + "expected_intent": "contract_usage_and_value", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_N012", + "group": "noisy_slang", + "text": "покажи топ-20 договоров с самым мелким бюджетом, но только активные", + "expected_intent": "contract_usage_and_value", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F001", + "group": "followup_chain", + "session": "b3_customer_value_chain", + "text": "Покажи топ-20 заказчиков по сумме поступлений за все время.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F002", + "group": "followup_chain", + "session": "b3_customer_value_chain", + "text": "Теперь только за 2020 год, тоже топ-20.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F003", + "group": "followup_chain", + "session": "b3_customer_value_chain", + "text": "И отдельно покажи топ-20 по частоте входящих платежей.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F004", + "group": "followup_chain", + "session": "b3_supplier_value_chain", + "text": "Покажи топ-20 поставщиков по сумме выплат за все время.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F005", + "group": "followup_chain", + "session": "b3_supplier_value_chain", + "text": "Теперь за 2020 год, тоже топ-20.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F006", + "group": "followup_chain", + "session": "b3_supplier_value_chain", + "text": "И дай топ-20 поставщиков по количеству выплат.", + "expected_intent": "supplier_payouts_profile", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F007", + "group": "followup_chain", + "session": "b3_deals_chain", + "text": "Покажи топ-20 самых крупных разовых сделок по поступлениям.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + }, + { + "id": "B3_F008", + "group": "followup_chain", + "session": "b3_deals_chain", + "text": "А теперь топ-20 самых маленьких сделок по бюджету среди активных заказчиков.", + "expected_intent": "customer_revenue_and_payments", + "expected_mode": "address_query", + "expected_reply_type": "factual" + } +] diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index bc557dc..ca2fa0f 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -6,7 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.extractAddressFilters = extractAddressFilters; const iconv_lite_1 = __importDefault(require("iconv-lite")); const ACCOUNT_PATTERN = /(?:сч[её]т|счет|account)[^0-9]{0,12}(\d{2}(?:[.,]\d{1,2})?)/i; -const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|\bпервые\b|\bтоп\b)\s*(\d{1,3})/i; +const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu; const COUNTERPARTY_PATTERN = /(?:по\s+контрагенту|контрагент(?:у|а)?|по\s+контре|контра|по\s+компан(?:ии|ию|ия)|компан(?:ия|ии|ию)|по\s+организац(?:ии|ию|ия)|организац(?:ия|ии|ию)|по\s+поставщик(?:у|а)?|поставщик(?:у|а)?|по\s+клиент(?:у|а)?|клиент(?:у|а)?|по\s+покупател(?:ю|я)|покупател(?:ю|я)|по\s+партнер(?:у|а)?|партнер(?:у|а)?|by\s+counterparty|counterparty|by\s+company|company|by\s+supplier|supplier|by\s+vendor|vendor|by\s+customer|customer|by\s+client|client|by\s+partner|partner)\s+([^\r\n,.;:]+)/iu; const CONTRACT_PATTERN = /(?:по\s+договору|договор(?:у|а)?\s*(?:№|#|n)?|by\s+contract|contract(?:\s*(?:no|number|#|n))?)\s+([^\r\n,.;:]+)/i; const DATE_DMY_PATTERN = /\b(\d{1,2})[.\/-](\d{1,2})[.\/-](\d{2,4})\b/; @@ -724,7 +724,10 @@ function extractAddressFilters(userMessage, intent) { intent === "document_type_and_account_section_profile" || intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || - intent === "contract_usage_overview"; + intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value"; const filters = { sort: "period_desc" }; diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index 4971ad5..332ec24 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -255,6 +255,47 @@ const CONTRACT_USAGE_OVERVIEW_HINTS = [ "contracts total used", "contract usage overview" ]; +const CUSTOMER_REVENUE_AND_PAYMENTS_HINTS = [ + "самые доходные клиенты", + "самые доходные заказчики", + "топ клиентов по сумме поступлений", + "топ заказчиков по сумме поступлений", + "кто нам больше всего занес", + "кто нам больше всего занёс", + "кто нам принес больше всего", + "кто нам принёс больше всего", + "кто платит чаще всего", + "средний чек клиентов", + "средний чек заказчиков", + "крупные сделки по поступлениям", + "маленькие сделки по поступлениям", + "smallest deals by inflow", + "largest deals by inflow", + "top customers by inflow", + "top customers by revenue" +]; +const SUPPLIER_PAYOUTS_PROFILE_HINTS = [ + "топ поставщиков по сумме выплат", + "кому мы больше всего заплатили", + "кому ушло больше всего денег", + "кому мы больше всего сгрузили денег", + "поставщики по выплатам", + "поставщики по исходящим платежам", + "поставщики с максимальным числом выплат", + "крупные разовые выплаты поставщикам", + "top suppliers by payouts", + "top suppliers by outgoing payments" +]; +const CONTRACT_USAGE_AND_VALUE_HINTS = [ + "договоры по обороту", + "договоры по сумме оборота", + "топ договоров по обороту", + "договоры с минимальным бюджетом", + "договоры с самым маленьким бюджетом", + "активные договоры по бюджету", + "contracts by turnover", + "contracts by budget" +]; const CONTRACT_LIST_BY_COUNTERPARTY_HINTS = [ "договоры по", "договора по", @@ -268,6 +309,82 @@ const CONTRACT_LIST_BY_COUNTERPARTY_HINTS = [ function hasAny(text, patterns) { return patterns.some((item) => text.includes(item)); } +function tokenizeText(text) { + return String(text ?? "") + .toLowerCase() + .split(/[^a-zа-яё0-9]+/iu) + .map((token) => token.trim()) + .filter((token) => token.length > 0); +} +function trimRussianEnding(token) { + return token.replace(/(?:иями|ями|ами|ого|ему|ому|ыми|ими|ией|ей|ий|ый|ой|ях|ах|ов|ев|ам|ям|ом|ем|ы|и|а|я|у|ю|е|о)$/u, ""); +} +function normalizeLexemeToken(rawToken) { + const token = String(rawToken ?? "").toLowerCase().replace(/[^a-zа-яё0-9]+/gu, ""); + if (!token) { + return ""; + } + if (/^[a-z0-9]+$/u.test(token)) { + return token; + } + return trimRussianEnding(token); +} +function levenshteinDistance(a, b) { + if (a === b) { + return 0; + } + if (!a.length) { + return b.length; + } + if (!b.length) { + return a.length; + } + const prev = new Array(b.length + 1); + const curr = new Array(b.length + 1); + for (let j = 0; j <= b.length; j += 1) { + prev[j] = j; + } + for (let i = 1; i <= a.length; i += 1) { + curr[0] = i; + for (let j = 1; j <= b.length; j += 1) { + const cost = a[i - 1] === b[j - 1] ? 0 : 1; + curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost); + } + for (let j = 0; j <= b.length; j += 1) { + prev[j] = curr[j]; + } + } + return prev[b.length]; +} +function hasFuzzyLexeme(text, lexemeRoots) { + const normalizedRoots = lexemeRoots + .map((root) => normalizeLexemeToken(root)) + .filter((root) => root.length > 0); + if (normalizedRoots.length === 0) { + return false; + } + const tokens = tokenizeText(text) + .map((token) => normalizeLexemeToken(token)) + .filter((token) => token.length >= 4); + for (const token of tokens) { + for (const root of normalizedRoots) { + if (token.includes(root)) { + return true; + } + if (root.includes(token) && token.length >= 5) { + return true; + } + const maxDistance = root.length >= 7 ? 2 : 1; + if (Math.abs(token.length - root.length) > maxDistance) { + continue; + } + if (levenshteinDistance(token, root) <= maxDistance) { + return true; + } + } + } + return false; +} function hasCompactAccountCodeToken(text) { // Match compact account tokens like 60.01 / 62, while avoiding date fragments. return /(? ЗНАЧЕНИЕ(Справочник.ДоговорыКонтрагентов.ПустаяСсылка)` ]); } +function buildContractValueWhereClause(filters, fieldPath, contractFieldPath) { + return buildWhereClause(filters, fieldPath, [ + `${contractFieldPath} <> ЗНАЧЕНИЕ(Справочник.ДоговорыКонтрагентов.ПустаяСсылка)` + ]); +} function normalizeAccountTokenForQuery(value) { const source = String(value ?? "").trim().replace(",", "."); const match = source.match(/^(\d{2})(?:\.(\d{1,2}))?/); @@ -554,6 +646,9 @@ function maxLimitForIntent(intent) { intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value" || intent === "list_contracts_by_counterparty" || intent === "list_documents_by_counterparty" || intent === "bank_operations_by_counterparty" || @@ -636,19 +731,35 @@ function buildAddressRecipePlan(recipe, filters) { ? CONTRACT_USAGE_OVERVIEW_QUERY_TEMPLATE .replaceAll("__WHERE_OUT_USED__", buildUsedContractWhereClause(filters, "БанкСписание.Дата", "БанкСписание.ДоговорКонтрагента")) .replaceAll("__WHERE_IN_USED__", buildUsedContractWhereClause(filters, "БанкПоступление.Дата", "БанкПоступление.ДоговорКонтрагента")) - : recipe.query_template === "contracts_by_counterparty_profile" - ? CONTRACTS_BY_COUNTERPARTY_QUERY_TEMPLATE.replaceAll("__LIMIT__", String(resolvedLimit)) - : MOVEMENTS_QUERY_TEMPLATE - .replace("__LIMIT__", String(resolvedLimit)) - .replace("__WHERE_CLAUSE__", (() => { - const extraConditions = []; - const accountCondition = buildMovementAccountCondition(filters); - if (accountCondition) { - extraConditions.push(accountCondition); - } - return buildWhereClause(filters, "Движения.Период", extraConditions); - })()) - .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)); + : recipe.query_template === "customer_revenue_profile" + ? CUSTOMER_REVENUE_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll("__WHERE_IN__", buildWhereClause(filters, "БанкПоступление.Дата")) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) + : recipe.query_template === "supplier_payout_profile" + ? SUPPLIER_PAYOUT_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll("__WHERE_OUT__", buildWhereClause(filters, "БанкСписание.Дата")) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) + : recipe.query_template === "contract_value_profile" + ? CONTRACT_VALUE_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll("__WHERE_IN_VALUE__", buildContractValueWhereClause(filters, "БанкПоступление.Дата", "БанкПоступление.ДоговорКонтрагента")) + .replaceAll("__WHERE_OUT_VALUE__", buildContractValueWhereClause(filters, "БанкСписание.Дата", "БанкСписание.ДоговорКонтрагента")) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) + : recipe.query_template === "contracts_by_counterparty_profile" + ? CONTRACTS_BY_COUNTERPARTY_QUERY_TEMPLATE.replaceAll("__LIMIT__", String(resolvedLimit)) + : MOVEMENTS_QUERY_TEMPLATE + .replace("__LIMIT__", String(resolvedLimit)) + .replace("__WHERE_CLAUSE__", (() => { + const extraConditions = []; + const accountCondition = buildMovementAccountCondition(filters); + if (accountCondition) { + extraConditions.push(accountCondition); + } + return buildWhereClause(filters, "Движения.Период", extraConditions); + })()) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)); return { recipe, query, diff --git a/llm_normalizer/backend/dist/services/address_runtime/composeStage.js b/llm_normalizer/backend/dist/services/address_runtime/composeStage.js index e71ae2a..47e8ebb 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/composeStage.js +++ b/llm_normalizer/backend/dist/services/address_runtime/composeStage.js @@ -93,6 +93,21 @@ function normalizeQuestionText(value) { .replace(/\s+/g, " ") .trim(); } +function detectRankingLimit(userMessage, fallback = 20) { + const text = normalizeQuestionText(userMessage); + if (!text) { + return fallback; + } + const match = text.match(/(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu); + if (!match) { + return fallback; + } + const parsed = Number(match[1]); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + return Math.min(200, Math.trunc(parsed)); +} function detectPeriodProfileFocus(userMessage) { const text = normalizeQuestionText(userMessage); if (!text) { @@ -181,6 +196,60 @@ function detectCounterpartyLifecycleFocus(userMessage) { } return "active_customers_period"; } +function detectMinOpsForAvgCheck(userMessage) { + const text = normalizeQuestionText(userMessage); + if (!text) { + return 3; + } + const explicit = text.match(/(?:мин(?:имум)?\s*|minimum\s*)(\d{1,2})/iu); + if (!explicit) { + return 3; + } + const parsed = Number(explicit[1]); + if (!Number.isFinite(parsed) || parsed <= 0) { + return 3; + } + return Math.min(20, Math.trunc(parsed)); +} +function detectValueRankingFocus(userMessage) { + const text = normalizeQuestionText(userMessage); + if (!text) { + return "top_by_total"; + } + if (/(?:сам(?:ый|ая|ое|ые)\s+высок[а-яё]*|highest|largest)\s+чек|(?:max\s+check|чек\s+макс)/iu.test(text)) { + return "top_by_max_single"; + } + if (/(?:сам(?:ые|ый|ая)\s+мал|наименьш|минимал|smallest|tiny|мелк)/iu.test(text) && /(?:сделк|deal|бюджет)/iu.test(text)) { + return "bottom_deals"; + } + if (/(?:сам(?:ые|ый|ая)\s+(?:круп|высок)|largest|highest|жирн|max)/iu.test(text) && + /(?:сделк|deal|платеж|платёж|выплат|поступлен|приход|входящ)/iu.test(text)) { + return "top_deals"; + } + if (/(?:средн(?:ий|его)\s+чек|avg(?:erage)?\s+check|average\s+payment)/iu.test(text)) { + return "top_by_avg_check_min_ops"; + } + if (/(?:макс(?:имальн)?(?:ой|ая|ое)?\s+сумм|max\s+single|largest\s+single)/iu.test(text)) { + return "top_by_max_single"; + } + if (/(?:по\s+количеств|частот|чаще\s+всего|most\s+frequent|ops?\s+count)/iu.test(text)) { + return "top_by_ops"; + } + return "top_by_total"; +} +function detectContractValueFocus(userMessage) { + const text = normalizeQuestionText(userMessage); + if (!text) { + return "top_by_turnover"; + } + if (/(?:документ|docs?|documents?|по\s+количеств)/iu.test(text)) { + return "top_by_docs"; + } + if (/(?:минимал|мал(?:еньк)?|smallest|least|мелк)/iu.test(text) && /(?:бюджет|оборот|turnover|budget|sum)/iu.test(text)) { + return "bottom_by_turnover_active"; + } + return "top_by_turnover"; +} function extractRequestedYearFromQuestion(userMessage) { const text = normalizeQuestionText(userMessage); if (!text) { @@ -214,6 +283,33 @@ function extractCounterpartyName(row) { } return null; } +function extractContractName(row) { + for (const token of row.analytics) { + const normalized = String(token ?? "").trim(); + if (!normalized) { + continue; + } + if (/^(?:0|<пусто>|пустая ссылка)$/iu.test(normalized)) { + continue; + } + if (/(?:договор|contract|дог\.)/iu.test(normalized)) { + return normalized; + } + } + for (const token of row.analytics) { + const normalized = String(token ?? "").trim(); + if (!normalized) { + continue; + } + if (/^\d{4}-\d{2}-\d{2}/.test(normalized)) { + continue; + } + if (normalized.length >= 3 && /[\\/]/.test(normalized)) { + return normalized; + } + } + return null; +} function deriveOperationalYearWindow(yearDocs, yearOps) { const docsSeries = [...yearDocs].sort((a, b) => a.year - b.year); const fallbackSeries = [...yearOps].sort((a, b) => a.year - b.year); @@ -660,6 +756,230 @@ function composeFactualReply(intent, rows, options = {}) { text: lines.join("\n") }; } + if (intent === "customer_revenue_and_payments" || intent === "supplier_payouts_profile") { + const isSupplier = intent === "supplier_payouts_profile"; + const focus = detectValueRankingFocus(options.userMessage); + const limit = detectRankingLimit(options.userMessage, 20); + const minOpsForAvgCheck = detectMinOpsForAvgCheck(options.userMessage); + const normalizedQuestion = normalizeQuestionText(options.userMessage); + const byCounterparty = new Map(); + const deals = []; + for (const row of rows) { + const counterparty = extractCounterpartyName(row); + const amount = row.amount ?? 0; + if (!counterparty || !Number.isFinite(amount) || amount <= 0) { + continue; + } + const current = byCounterparty.get(counterparty); + if (!current) { + byCounterparty.set(counterparty, { + name: counterparty, + total: amount, + ops: 1, + maxSingle: amount, + minSingle: amount, + lastPeriod: row.period + }); + } + else { + current.total += amount; + current.ops += 1; + current.maxSingle = Math.max(current.maxSingle, amount); + current.minSingle = Math.min(current.minSingle, amount); + if ((row.period ?? "") > (current.lastPeriod ?? "")) { + current.lastPeriod = row.period; + } + } + deals.push({ + period: row.period, + registrator: row.registrator, + counterparty, + amount + }); + } + const profileRows = Array.from(byCounterparty.values()); + const rankedByTotal = [...profileRows].sort((a, b) => b.total - a.total || b.ops - a.ops || a.name.localeCompare(b.name)); + const rankedByOps = [...profileRows].sort((a, b) => b.ops - a.ops || b.total - a.total || a.name.localeCompare(b.name)); + const rankedByMaxSingle = [...profileRows].sort((a, b) => b.maxSingle - a.maxSingle || b.total - a.total || a.name.localeCompare(b.name)); + const rankedByAvgCheck = [...profileRows] + .filter((item) => item.ops >= minOpsForAvgCheck) + .map((item) => ({ + ...item, + avgCheck: item.total / item.ops + })) + .sort((a, b) => b.avgCheck - a.avgCheck || b.total - a.total || a.name.localeCompare(b.name)); + const rankedDealsTop = [...deals].sort((a, b) => b.amount - a.amount || (b.period ?? "").localeCompare(a.period ?? "")); + const activeOnlyForBottomDeals = /(?:активн|active)/iu.test(normalizedQuestion); + const activeCounterpartiesForBottom = new Set(profileRows.filter((item) => item.ops >= Math.max(3, minOpsForAvgCheck)).map((item) => item.name)); + const rankedDealsBottom = [...deals] + .filter((item) => !activeOnlyForBottomDeals || activeCounterpartiesForBottom.has(item.counterparty)) + .sort((a, b) => a.amount - b.amount || (a.period ?? "").localeCompare(b.period ?? "")); + const lines = [ + isSupplier + ? "Собран профиль выплат поставщикам (bank-doc value aggregate)." + : "Собран профиль поступлений от заказчиков (bank-doc value aggregate).", + `Строк источника: ${rows.length}.`, + `Уникальных контрагентов: ${profileRows.length}.` + ]; + if (profileRows.length === 0) { + lines.push("По выбранному окну данных платежные строки не найдены."); + return { + responseType: "FACTUAL_SUMMARY", + text: lines.join("\n") + }; + } + if (focus === "top_by_ops") { + const visible = rankedByOps.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} поставщиков по количеству исходящих платежных операций:` + : `Топ-${visible.length} заказчиков по количеству входящих платежных операций:`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.name} | операций: ${item.ops} | сумма: ${item.total} | макс: ${item.maxSingle}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (focus === "top_by_max_single") { + const visible = rankedByMaxSingle.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} поставщиков по максимальной разовой выплате:` + : `Топ-${visible.length} заказчиков по максимальной сумме одной входящей операции:`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.name} | max single: ${item.maxSingle} | сумма: ${item.total} | операций: ${item.ops}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (focus === "top_by_avg_check_min_ops") { + const visible = rankedByAvgCheck.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} поставщиков по среднему чеку (минимум ${minOpsForAvgCheck} операций):` + : `Топ-${visible.length} заказчиков по среднему чеку (минимум ${minOpsForAvgCheck} входящих операций):`); + if (visible.length === 0) { + lines.push(`Контрагентов с минимум ${minOpsForAvgCheck} операций не найдено.`); + } + else { + lines.push(...visible.map((item, index) => `${index + 1}. ${item.name} | средний чек: ${item.avgCheck.toFixed(2)} | операций: ${item.ops} | сумма: ${item.total}`)); + } + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (focus === "top_deals") { + const visible = rankedDealsTop.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} самых крупных разовых выплат поставщикам:` + : `Топ-${visible.length} самых крупных разовых сделок по поступлениям:`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.period ?? "n/a"} | ${item.counterparty} | ${item.registrator} | ${item.amount}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (focus === "bottom_deals") { + const visible = rankedDealsBottom.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} самых маленьких разовых выплат:` + : `Топ-${visible.length} самых маленьких разовых сделок по поступлениям:`); + if (activeOnlyForBottomDeals) { + lines.push("Фильтр: только активные контрагенты (минимум 3 операции)."); + } + lines.push(...visible.map((item, index) => `${index + 1}. ${item.period ?? "n/a"} | ${item.counterparty} | ${item.registrator} | ${item.amount}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + const visible = rankedByTotal.slice(0, limit); + lines.push(isSupplier + ? `Топ-${visible.length} поставщиков по сумме выплат:` + : `Топ-${visible.length} заказчиков по сумме поступлений:`); + lines.push(...visible.map((item, index) => { + const avgCheck = item.ops > 0 ? (item.total / item.ops).toFixed(2) : "0"; + return `${index + 1}. ${item.name} | сумма: ${item.total} | операций: ${item.ops} | средний чек: ${avgCheck} | макс: ${item.maxSingle}`; + })); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (intent === "contract_usage_and_value") { + const focus = detectContractValueFocus(options.userMessage); + const limit = detectRankingLimit(options.userMessage, 20); + const byContract = new Map(); + for (const row of rows) { + const contract = extractContractName(row); + const amount = row.amount ?? 0; + if (!contract || !Number.isFinite(amount) || amount <= 0) { + continue; + } + const counterparty = extractCounterpartyName(row); + const current = byContract.get(contract); + if (!current) { + byContract.set(contract, { + contract, + turnover: amount, + docs: 1, + lastPeriod: row.period, + counterparties: new Set(counterparty ? [counterparty] : []) + }); + } + else { + current.turnover += amount; + current.docs += 1; + if ((row.period ?? "") > (current.lastPeriod ?? "")) { + current.lastPeriod = row.period; + } + if (counterparty) { + current.counterparties.add(counterparty); + } + } + } + const contractRows = Array.from(byContract.values()); + const rankedByTurnover = [...contractRows].sort((a, b) => b.turnover - a.turnover || b.docs - a.docs || a.contract.localeCompare(b.contract)); + const rankedByDocs = [...contractRows].sort((a, b) => b.docs - a.docs || b.turnover - a.turnover || a.contract.localeCompare(b.contract)); + const rankedBottomActive = [...contractRows] + .filter((item) => item.docs > 0 && item.turnover > 0) + .sort((a, b) => a.turnover - b.turnover || b.docs - a.docs || a.contract.localeCompare(b.contract)); + const lines = [ + "Собран профиль договоров по обороту/бюджету (bank-doc contract aggregate).", + `Строк источника: ${rows.length}.`, + `Активных договоров: ${contractRows.length}.` + ]; + if (contractRows.length === 0) { + lines.push("В выбранном окне не найдено операций, связанных с договорами."); + return { + responseType: "FACTUAL_SUMMARY", + text: lines.join("\n") + }; + } + if (focus === "top_by_docs") { + const visible = rankedByDocs.slice(0, limit); + lines.push(`Топ-${visible.length} договоров по количеству операций:`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.contract} | операций: ${item.docs} | оборот: ${item.turnover} | контрагентов: ${item.counterparties.size}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (focus === "bottom_by_turnover_active") { + const visible = rankedBottomActive.slice(0, limit); + lines.push(`Топ-${visible.length} активных договоров с минимальным бюджетом (оборотом):`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.contract} | оборот: ${item.turnover} | операций: ${item.docs} | последняя активность: ${item.lastPeriod ?? "n/a"}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + const visible = rankedByTurnover.slice(0, limit); + lines.push(`Топ-${visible.length} договоров по сумме оборота:`); + lines.push(...visible.map((item, index) => `${index + 1}. ${item.contract} | оборот: ${item.turnover} | операций: ${item.docs} | контрагентов: ${item.counterparties.size} | последняя активность: ${item.lastPeriod ?? "n/a"}`)); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } if (intent === "account_balance_snapshot") { const movementSum = rows.reduce((sum, row) => sum + (row.amount ?? 0), 0); const lines = [ diff --git a/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js b/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js index e16f1ac..ec72d4d 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js +++ b/llm_normalizer/backend/dist/services/address_runtime/predecomposeContract.js @@ -40,7 +40,10 @@ function inferAggregationProfile(intent, shape) { intent === "document_type_and_account_section_profile" || intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || - intent === "contract_usage_overview") { + intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value") { return "management_profile"; } if (intent === "account_balance_snapshot" || intent === "documents_forming_balance") { diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index 155de9f..dfadbfc 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -2,7 +2,7 @@ import iconv from "iconv-lite"; const ACCOUNT_PATTERN = /(?:сч[её]т|счет|account)[^0-9]{0,12}(\d{2}(?:[.,]\d{1,2})?)/i; -const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|\bпервые\b|\bтоп\b)\s*(\d{1,3})/i; +const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu; const COUNTERPARTY_PATTERN = /(?:по\s+контрагенту|контрагент(?:у|а)?|по\s+контре|контра|по\s+компан(?:ии|ию|ия)|компан(?:ия|ии|ию)|по\s+организац(?:ии|ию|ия)|организац(?:ия|ии|ию)|по\s+поставщик(?:у|а)?|поставщик(?:у|а)?|по\s+клиент(?:у|а)?|клиент(?:у|а)?|по\s+покупател(?:ю|я)|покупател(?:ю|я)|по\s+партнер(?:у|а)?|партнер(?:у|а)?|by\s+counterparty|counterparty|by\s+company|company|by\s+supplier|supplier|by\s+vendor|vendor|by\s+customer|customer|by\s+client|client|by\s+partner|partner)\s+([^\r\n,.;:]+)/iu; const CONTRACT_PATTERN = /(?:по\s+договору|договор(?:у|а)?\s*(?:№|#|n)?|by\s+contract|contract(?:\s*(?:no|number|#|n))?)\s+([^\r\n,.;:]+)/i; @@ -798,7 +798,10 @@ export function extractAddressFilters(userMessage: string, intent: AddressIntent intent === "document_type_and_account_section_profile" || intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || - intent === "contract_usage_overview"; + intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value"; const filters: AddressFilterSet = { sort: "period_desc" }; diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index 97b68ca..9b7cee4 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -268,6 +268,50 @@ const CONTRACT_USAGE_OVERVIEW_HINTS = [ "contract usage overview" ]; +const CUSTOMER_REVENUE_AND_PAYMENTS_HINTS = [ + "самые доходные клиенты", + "самые доходные заказчики", + "топ клиентов по сумме поступлений", + "топ заказчиков по сумме поступлений", + "кто нам больше всего занес", + "кто нам больше всего занёс", + "кто нам принес больше всего", + "кто нам принёс больше всего", + "кто платит чаще всего", + "средний чек клиентов", + "средний чек заказчиков", + "крупные сделки по поступлениям", + "маленькие сделки по поступлениям", + "smallest deals by inflow", + "largest deals by inflow", + "top customers by inflow", + "top customers by revenue" +]; + +const SUPPLIER_PAYOUTS_PROFILE_HINTS = [ + "топ поставщиков по сумме выплат", + "кому мы больше всего заплатили", + "кому ушло больше всего денег", + "кому мы больше всего сгрузили денег", + "поставщики по выплатам", + "поставщики по исходящим платежам", + "поставщики с максимальным числом выплат", + "крупные разовые выплаты поставщикам", + "top suppliers by payouts", + "top suppliers by outgoing payments" +]; + +const CONTRACT_USAGE_AND_VALUE_HINTS = [ + "договоры по обороту", + "договоры по сумме оборота", + "топ договоров по обороту", + "договоры с минимальным бюджетом", + "договоры с самым маленьким бюджетом", + "активные договоры по бюджету", + "contracts by turnover", + "contracts by budget" +]; + const CONTRACT_LIST_BY_COUNTERPARTY_HINTS = [ "договоры по", "договора по", @@ -283,6 +327,94 @@ function hasAny(text: string, patterns: string[]): boolean { return patterns.some((item) => text.includes(item)); } +function tokenizeText(text: string): string[] { + return String(text ?? "") + .toLowerCase() + .split(/[^a-zа-яё0-9]+/iu) + .map((token) => token.trim()) + .filter((token) => token.length > 0); +} + +function trimRussianEnding(token: string): string { + return token.replace( + /(?:иями|ями|ами|ого|ему|ому|ыми|ими|ией|ей|ий|ый|ой|ях|ах|ов|ев|ам|ям|ом|ем|ы|и|а|я|у|ю|е|о)$/u, + "" + ); +} + +function normalizeLexemeToken(rawToken: string): string { + const token = String(rawToken ?? "").toLowerCase().replace(/[^a-zа-яё0-9]+/gu, ""); + if (!token) { + return ""; + } + if (/^[a-z0-9]+$/u.test(token)) { + return token; + } + return trimRussianEnding(token); +} + +function levenshteinDistance(a: string, b: string): number { + if (a === b) { + return 0; + } + if (!a.length) { + return b.length; + } + if (!b.length) { + return a.length; + } + const prev = new Array(b.length + 1); + const curr = new Array(b.length + 1); + for (let j = 0; j <= b.length; j += 1) { + prev[j] = j; + } + for (let i = 1; i <= a.length; i += 1) { + curr[0] = i; + for (let j = 1; j <= b.length; j += 1) { + const cost = a[i - 1] === b[j - 1] ? 0 : 1; + curr[j] = Math.min( + prev[j] + 1, + curr[j - 1] + 1, + prev[j - 1] + cost + ); + } + for (let j = 0; j <= b.length; j += 1) { + prev[j] = curr[j]; + } + } + return prev[b.length]; +} + +function hasFuzzyLexeme(text: string, lexemeRoots: string[]): boolean { + const normalizedRoots = lexemeRoots + .map((root) => normalizeLexemeToken(root)) + .filter((root) => root.length > 0); + if (normalizedRoots.length === 0) { + return false; + } + const tokens = tokenizeText(text) + .map((token) => normalizeLexemeToken(token)) + .filter((token) => token.length >= 4); + for (const token of tokens) { + for (const root of normalizedRoots) { + if (token.includes(root)) { + return true; + } + if (root.includes(token) && token.length >= 5) { + return true; + } + const maxDistance = root.length >= 7 ? 2 : 1; + if (Math.abs(token.length - root.length) > maxDistance) { + continue; + } + if (levenshteinDistance(token, root) <= maxDistance) { + return true; + } + } + } + return false; +} + function hasCompactAccountCodeToken(text: string): boolean { // Match compact account tokens like 60.01 / 62, while avoiding date fragments. return /(? ЗНАЧЕНИЕ(Справочник.ДоговорыКонтрагентов.ПустаяСсылка)` + ]); +} + function normalizeAccountTokenForQuery(value: string): string { const source = String(value ?? "").trim().replace(",", "."); const match = source.match(/^(\d{2})(?:\.(\d{1,2}))?/); @@ -592,6 +688,9 @@ function maxLimitForIntent(intent: AddressIntent): number { intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value" || intent === "list_contracts_by_counterparty" || intent === "list_documents_by_counterparty" || intent === "bank_operations_by_counterparty" || @@ -706,6 +805,28 @@ export function buildAddressRecipePlan( "__WHERE_IN_USED__", buildUsedContractWhereClause(filters, "БанкПоступление.Дата", "БанкПоступление.ДоговорКонтрагента") ) + : recipe.query_template === "customer_revenue_profile" + ? CUSTOMER_REVENUE_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll("__WHERE_IN__", buildWhereClause(filters, "БанкПоступление.Дата")) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) + : recipe.query_template === "supplier_payout_profile" + ? SUPPLIER_PAYOUT_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll("__WHERE_OUT__", buildWhereClause(filters, "БанкСписание.Дата")) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) + : recipe.query_template === "contract_value_profile" + ? CONTRACT_VALUE_PROFILE_QUERY_TEMPLATE + .replaceAll("__LIMIT__", String(resolvedLimit)) + .replaceAll( + "__WHERE_IN_VALUE__", + buildContractValueWhereClause(filters, "БанкПоступление.Дата", "БанкПоступление.ДоговорКонтрагента") + ) + .replaceAll( + "__WHERE_OUT_VALUE__", + buildContractValueWhereClause(filters, "БанкСписание.Дата", "БанкСписание.ДоговорКонтрагента") + ) + .replaceAll("__ORDER_DIRECTION__", resolveOrderDirection(filters.sort)) : recipe.query_template === "contracts_by_counterparty_profile" ? CONTRACTS_BY_COUNTERPARTY_QUERY_TEMPLATE.replaceAll("__LIMIT__", String(resolvedLimit)) : MOVEMENTS_QUERY_TEMPLATE diff --git a/llm_normalizer/backend/src/services/address_runtime/composeStage.ts b/llm_normalizer/backend/src/services/address_runtime/composeStage.ts index d061f49..bfa66ce 100644 --- a/llm_normalizer/backend/src/services/address_runtime/composeStage.ts +++ b/llm_normalizer/backend/src/services/address_runtime/composeStage.ts @@ -34,6 +34,14 @@ type CounterpartyProfileFocus = | "customers_only" | "mixed_only"; type CounterpartyLifecycleFocus = "active_customers_period" | "active_customers_all_time"; +type ValueRankingFocus = + | "top_by_total" + | "top_by_ops" + | "top_by_max_single" + | "top_by_avg_check_min_ops" + | "top_deals" + | "bottom_deals"; +type ContractValueFocus = "top_by_turnover" | "bottom_by_turnover_active" | "top_by_docs"; interface YearAggPoint { year: number; @@ -142,6 +150,22 @@ function normalizeQuestionText(value: string | null | undefined): string { .trim(); } +function detectRankingLimit(userMessage: string | null | undefined, fallback = 20): number { + const text = normalizeQuestionText(userMessage); + if (!text) { + return fallback; + } + const match = text.match(/(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu); + if (!match) { + return fallback; + } + const parsed = Number(match[1]); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + return Math.min(200, Math.trunc(parsed)); +} + function detectPeriodProfileFocus(userMessage: string | null | undefined): PeriodProfileFocus { const text = normalizeQuestionText(userMessage); if (!text) { @@ -255,6 +279,65 @@ function detectCounterpartyLifecycleFocus(userMessage: string | null | undefined return "active_customers_period"; } +function detectMinOpsForAvgCheck(userMessage: string | null | undefined): number { + const text = normalizeQuestionText(userMessage); + if (!text) { + return 3; + } + const explicit = text.match(/(?:мин(?:имум)?\s*|minimum\s*)(\d{1,2})/iu); + if (!explicit) { + return 3; + } + const parsed = Number(explicit[1]); + if (!Number.isFinite(parsed) || parsed <= 0) { + return 3; + } + return Math.min(20, Math.trunc(parsed)); +} + +function detectValueRankingFocus(userMessage: string | null | undefined): ValueRankingFocus { + const text = normalizeQuestionText(userMessage); + if (!text) { + return "top_by_total"; + } + if (/(?:сам(?:ый|ая|ое|ые)\s+высок[а-яё]*|highest|largest)\s+чек|(?:max\s+check|чек\s+макс)/iu.test(text)) { + return "top_by_max_single"; + } + if (/(?:сам(?:ые|ый|ая)\s+мал|наименьш|минимал|smallest|tiny|мелк)/iu.test(text) && /(?:сделк|deal|бюджет)/iu.test(text)) { + return "bottom_deals"; + } + if ( + /(?:сам(?:ые|ый|ая)\s+(?:круп|высок)|largest|highest|жирн|max)/iu.test(text) && + /(?:сделк|deal|платеж|платёж|выплат|поступлен|приход|входящ)/iu.test(text) + ) { + return "top_deals"; + } + if (/(?:средн(?:ий|его)\s+чек|avg(?:erage)?\s+check|average\s+payment)/iu.test(text)) { + return "top_by_avg_check_min_ops"; + } + if (/(?:макс(?:имальн)?(?:ой|ая|ое)?\s+сумм|max\s+single|largest\s+single)/iu.test(text)) { + return "top_by_max_single"; + } + if (/(?:по\s+количеств|частот|чаще\s+всего|most\s+frequent|ops?\s+count)/iu.test(text)) { + return "top_by_ops"; + } + return "top_by_total"; +} + +function detectContractValueFocus(userMessage: string | null | undefined): ContractValueFocus { + const text = normalizeQuestionText(userMessage); + if (!text) { + return "top_by_turnover"; + } + if (/(?:документ|docs?|documents?|по\s+количеств)/iu.test(text)) { + return "top_by_docs"; + } + if (/(?:минимал|мал(?:еньк)?|smallest|least|мелк)/iu.test(text) && /(?:бюджет|оборот|turnover|budget|sum)/iu.test(text)) { + return "bottom_by_turnover_active"; + } + return "top_by_turnover"; +} + function extractRequestedYearFromQuestion(userMessage: string | null | undefined): number | null { const text = normalizeQuestionText(userMessage); if (!text) { @@ -290,6 +373,34 @@ function extractCounterpartyName(row: ComposeStageRow): string | null { return null; } +function extractContractName(row: ComposeStageRow): string | null { + for (const token of row.analytics) { + const normalized = String(token ?? "").trim(); + if (!normalized) { + continue; + } + if (/^(?:0|<пусто>|пустая ссылка)$/iu.test(normalized)) { + continue; + } + if (/(?:договор|contract|дог\.)/iu.test(normalized)) { + return normalized; + } + } + for (const token of row.analytics) { + const normalized = String(token ?? "").trim(); + if (!normalized) { + continue; + } + if (/^\d{4}-\d{2}-\d{2}/.test(normalized)) { + continue; + } + if (normalized.length >= 3 && /[\\/]/.test(normalized)) { + return normalized; + } + } + return null; +} + function deriveOperationalYearWindow( yearDocs: YearAggPoint[], yearOps: YearAggPoint[] @@ -838,6 +949,326 @@ export function composeFactualReply( }; } + if (intent === "customer_revenue_and_payments" || intent === "supplier_payouts_profile") { + const isSupplier = intent === "supplier_payouts_profile"; + const focus = detectValueRankingFocus(options.userMessage); + const limit = detectRankingLimit(options.userMessage, 20); + const minOpsForAvgCheck = detectMinOpsForAvgCheck(options.userMessage); + const normalizedQuestion = normalizeQuestionText(options.userMessage); + + const byCounterparty = new Map< + string, + { + name: string; + total: number; + ops: number; + maxSingle: number; + minSingle: number; + lastPeriod: string | null; + } + >(); + const deals: Array<{ period: string | null; registrator: string; counterparty: string; amount: number }> = []; + + for (const row of rows) { + const counterparty = extractCounterpartyName(row); + const amount = row.amount ?? 0; + if (!counterparty || !Number.isFinite(amount) || amount <= 0) { + continue; + } + + const current = byCounterparty.get(counterparty); + if (!current) { + byCounterparty.set(counterparty, { + name: counterparty, + total: amount, + ops: 1, + maxSingle: amount, + minSingle: amount, + lastPeriod: row.period + }); + } else { + current.total += amount; + current.ops += 1; + current.maxSingle = Math.max(current.maxSingle, amount); + current.minSingle = Math.min(current.minSingle, amount); + if ((row.period ?? "") > (current.lastPeriod ?? "")) { + current.lastPeriod = row.period; + } + } + deals.push({ + period: row.period, + registrator: row.registrator, + counterparty, + amount + }); + } + + const profileRows = Array.from(byCounterparty.values()); + const rankedByTotal = [...profileRows].sort((a, b) => b.total - a.total || b.ops - a.ops || a.name.localeCompare(b.name)); + const rankedByOps = [...profileRows].sort((a, b) => b.ops - a.ops || b.total - a.total || a.name.localeCompare(b.name)); + const rankedByMaxSingle = [...profileRows].sort( + (a, b) => b.maxSingle - a.maxSingle || b.total - a.total || a.name.localeCompare(b.name) + ); + const rankedByAvgCheck = [...profileRows] + .filter((item) => item.ops >= minOpsForAvgCheck) + .map((item) => ({ + ...item, + avgCheck: item.total / item.ops + })) + .sort((a, b) => b.avgCheck - a.avgCheck || b.total - a.total || a.name.localeCompare(b.name)); + + const rankedDealsTop = [...deals].sort( + (a, b) => b.amount - a.amount || (b.period ?? "").localeCompare(a.period ?? "") + ); + const activeOnlyForBottomDeals = /(?:активн|active)/iu.test(normalizedQuestion); + const activeCounterpartiesForBottom = new Set( + profileRows.filter((item) => item.ops >= Math.max(3, minOpsForAvgCheck)).map((item) => item.name) + ); + const rankedDealsBottom = [...deals] + .filter((item) => !activeOnlyForBottomDeals || activeCounterpartiesForBottom.has(item.counterparty)) + .sort((a, b) => a.amount - b.amount || (a.period ?? "").localeCompare(b.period ?? "")); + + const lines: string[] = [ + isSupplier + ? "Собран профиль выплат поставщикам (bank-doc value aggregate)." + : "Собран профиль поступлений от заказчиков (bank-doc value aggregate).", + `Строк источника: ${rows.length}.`, + `Уникальных контрагентов: ${profileRows.length}.` + ]; + + if (profileRows.length === 0) { + lines.push("По выбранному окну данных платежные строки не найдены."); + return { + responseType: "FACTUAL_SUMMARY", + text: lines.join("\n") + }; + } + + if (focus === "top_by_ops") { + const visible = rankedByOps.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} поставщиков по количеству исходящих платежных операций:` + : `Топ-${visible.length} заказчиков по количеству входящих платежных операций:` + ); + lines.push( + ...visible.map( + (item, index) => `${index + 1}. ${item.name} | операций: ${item.ops} | сумма: ${item.total} | макс: ${item.maxSingle}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (focus === "top_by_max_single") { + const visible = rankedByMaxSingle.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} поставщиков по максимальной разовой выплате:` + : `Топ-${visible.length} заказчиков по максимальной сумме одной входящей операции:` + ); + lines.push( + ...visible.map((item, index) => `${index + 1}. ${item.name} | max single: ${item.maxSingle} | сумма: ${item.total} | операций: ${item.ops}`) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (focus === "top_by_avg_check_min_ops") { + const visible = rankedByAvgCheck.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} поставщиков по среднему чеку (минимум ${minOpsForAvgCheck} операций):` + : `Топ-${visible.length} заказчиков по среднему чеку (минимум ${minOpsForAvgCheck} входящих операций):` + ); + if (visible.length === 0) { + lines.push(`Контрагентов с минимум ${minOpsForAvgCheck} операций не найдено.`); + } else { + lines.push( + ...visible.map( + (item, index) => + `${index + 1}. ${item.name} | средний чек: ${item.avgCheck.toFixed(2)} | операций: ${item.ops} | сумма: ${item.total}` + ) + ); + } + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (focus === "top_deals") { + const visible = rankedDealsTop.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} самых крупных разовых выплат поставщикам:` + : `Топ-${visible.length} самых крупных разовых сделок по поступлениям:` + ); + lines.push( + ...visible.map( + (item, index) => `${index + 1}. ${item.period ?? "n/a"} | ${item.counterparty} | ${item.registrator} | ${item.amount}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (focus === "bottom_deals") { + const visible = rankedDealsBottom.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} самых маленьких разовых выплат:` + : `Топ-${visible.length} самых маленьких разовых сделок по поступлениям:` + ); + if (activeOnlyForBottomDeals) { + lines.push("Фильтр: только активные контрагенты (минимум 3 операции)."); + } + lines.push( + ...visible.map( + (item, index) => `${index + 1}. ${item.period ?? "n/a"} | ${item.counterparty} | ${item.registrator} | ${item.amount}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + const visible = rankedByTotal.slice(0, limit); + lines.push( + isSupplier + ? `Топ-${visible.length} поставщиков по сумме выплат:` + : `Топ-${visible.length} заказчиков по сумме поступлений:` + ); + lines.push( + ...visible.map((item, index) => { + const avgCheck = item.ops > 0 ? (item.total / item.ops).toFixed(2) : "0"; + return `${index + 1}. ${item.name} | сумма: ${item.total} | операций: ${item.ops} | средний чек: ${avgCheck} | макс: ${item.maxSingle}`; + }) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (intent === "contract_usage_and_value") { + const focus = detectContractValueFocus(options.userMessage); + const limit = detectRankingLimit(options.userMessage, 20); + const byContract = new Map< + string, + { + contract: string; + turnover: number; + docs: number; + lastPeriod: string | null; + counterparties: Set; + } + >(); + + for (const row of rows) { + const contract = extractContractName(row); + const amount = row.amount ?? 0; + if (!contract || !Number.isFinite(amount) || amount <= 0) { + continue; + } + const counterparty = extractCounterpartyName(row); + const current = byContract.get(contract); + if (!current) { + byContract.set(contract, { + contract, + turnover: amount, + docs: 1, + lastPeriod: row.period, + counterparties: new Set(counterparty ? [counterparty] : []) + }); + } else { + current.turnover += amount; + current.docs += 1; + if ((row.period ?? "") > (current.lastPeriod ?? "")) { + current.lastPeriod = row.period; + } + if (counterparty) { + current.counterparties.add(counterparty); + } + } + } + + const contractRows = Array.from(byContract.values()); + const rankedByTurnover = [...contractRows].sort( + (a, b) => b.turnover - a.turnover || b.docs - a.docs || a.contract.localeCompare(b.contract) + ); + const rankedByDocs = [...contractRows].sort( + (a, b) => b.docs - a.docs || b.turnover - a.turnover || a.contract.localeCompare(b.contract) + ); + const rankedBottomActive = [...contractRows] + .filter((item) => item.docs > 0 && item.turnover > 0) + .sort((a, b) => a.turnover - b.turnover || b.docs - a.docs || a.contract.localeCompare(b.contract)); + + const lines: string[] = [ + "Собран профиль договоров по обороту/бюджету (bank-doc contract aggregate).", + `Строк источника: ${rows.length}.`, + `Активных договоров: ${contractRows.length}.` + ]; + + if (contractRows.length === 0) { + lines.push("В выбранном окне не найдено операций, связанных с договорами."); + return { + responseType: "FACTUAL_SUMMARY", + text: lines.join("\n") + }; + } + + if (focus === "top_by_docs") { + const visible = rankedByDocs.slice(0, limit); + lines.push(`Топ-${visible.length} договоров по количеству операций:`); + lines.push( + ...visible.map( + (item, index) => + `${index + 1}. ${item.contract} | операций: ${item.docs} | оборот: ${item.turnover} | контрагентов: ${item.counterparties.size}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + if (focus === "bottom_by_turnover_active") { + const visible = rankedBottomActive.slice(0, limit); + lines.push(`Топ-${visible.length} активных договоров с минимальным бюджетом (оборотом):`); + lines.push( + ...visible.map( + (item, index) => + `${index + 1}. ${item.contract} | оборот: ${item.turnover} | операций: ${item.docs} | последняя активность: ${item.lastPeriod ?? "n/a"}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + + const visible = rankedByTurnover.slice(0, limit); + lines.push(`Топ-${visible.length} договоров по сумме оборота:`); + lines.push( + ...visible.map( + (item, index) => + `${index + 1}. ${item.contract} | оборот: ${item.turnover} | операций: ${item.docs} | контрагентов: ${item.counterparties.size} | последняя активность: ${item.lastPeriod ?? "n/a"}` + ) + ); + return { + responseType: "FACTUAL_LIST", + text: lines.join("\n") + }; + } + if (intent === "account_balance_snapshot") { const movementSum = rows.reduce((sum, row) => sum + (row.amount ?? 0), 0); const lines = [ diff --git a/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts b/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts index 9ef5e6e..fe4e5e0 100644 --- a/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts +++ b/llm_normalizer/backend/src/services/address_runtime/predecomposeContract.ts @@ -82,7 +82,10 @@ function inferAggregationProfile(intent: AddressIntent, shape: AddressQueryShape intent === "document_type_and_account_section_profile" || intent === "counterparty_population_and_roles" || intent === "counterparty_activity_lifecycle" || - intent === "contract_usage_overview" + intent === "contract_usage_overview" || + intent === "customer_revenue_and_payments" || + intent === "supplier_payouts_profile" || + intent === "contract_usage_and_value" ) { return "management_profile"; } diff --git a/llm_normalizer/backend/src/types/addressQuery.ts b/llm_normalizer/backend/src/types/addressQuery.ts index 8a9f0d0..171bc8b 100644 --- a/llm_normalizer/backend/src/types/addressQuery.ts +++ b/llm_normalizer/backend/src/types/addressQuery.ts @@ -6,6 +6,9 @@ export type AddressIntent = | "counterparty_population_and_roles" | "counterparty_activity_lifecycle" | "contract_usage_overview" + | "customer_revenue_and_payments" + | "supplier_payouts_profile" + | "contract_usage_and_value" | "list_contracts_by_counterparty" | "list_open_contracts" | "list_payables_counterparties" @@ -113,6 +116,9 @@ export interface AddressRecipeDefinition { | "counterparty_roles_profile" | "counterparty_lifecycle_profile" | "contract_usage_profile" + | "customer_revenue_profile" + | "supplier_payout_profile" + | "contract_value_profile" | "contracts_by_counterparty_profile"; required_filters: Array; optional_filters: Array; diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index f89b8eb..eaf76d0 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -90,6 +90,31 @@ describe("address query shape classifier", () => { expect(result.mode).toBe("address_query"); }); + it("keeps customer value ranking question in address lane", () => { + const result = detectAddressQuestionMode("какие клиенты самые доходные, выдай топ-20"); + expect(result.mode).toBe("address_query"); + }); + + it("keeps highest inflow slang question in address lane", () => { + const result = detectAddressQuestionMode("какие приходы самые высокие за все время"); + expect(result.mode).toBe("address_query"); + }); + + it("keeps typo customer highest-check question in address lane", () => { + const result = detectAddressQuestionMode("с каких кликентов самый высокий чек"); + expect(result.mode).toBe("address_query"); + }); + + it("keeps supplier payout ranking question in address lane", () => { + const result = detectAddressQuestionMode("кому мы больше всего сгрузили денег, топ-20 поставщиков"); + expect(result.mode).toBe("address_query"); + }); + + it("keeps contract turnover ranking question in address lane", () => { + const result = detectAddressQuestionMode("договоры по обороту ранкни и дай топ-20"); + expect(result.mode).toBe("address_query"); + }); + }); describe("address compose stage utf8 headers", () => { @@ -910,6 +935,184 @@ describe("address compose stage utf8 headers", () => { expect(reply.text).toContain("Использованных договоров (есть factual связь с операциями): 148."); expect(reply.text).toContain("Неиспользуемых договоров: 372."); }); + + it("renders customer value top list with explicit top-2 limit", () => { + const reply = composeFactualReply( + "customer_revenue_and_payments", + [ + { + period: "2020-03-01T00:00:00Z", + registrator: "Поступление 1", + account_dt: "", + account_kt: "", + amount: 500, + analytics: ["Клиент А", "Договор А-1"] + }, + { + period: "2020-03-02T00:00:00Z", + registrator: "Поступление 2", + account_dt: "", + account_kt: "", + amount: 700, + analytics: ["Клиент Б", "Договор Б-1"] + }, + { + period: "2020-03-03T00:00:00Z", + registrator: "Поступление 3", + account_dt: "", + account_kt: "", + amount: 300, + analytics: ["Клиент А", "Договор А-1"] + } + ], + { userMessage: "покажи топ-2 заказчиков по сумме поступлений" } + ); + + expect(reply.responseType).toBe("FACTUAL_LIST"); + expect(reply.text).toContain("Топ-2 заказчиков по сумме поступлений:"); + expect(reply.text).toContain("1. Клиент А | сумма: 800"); + expect(reply.text).toContain("2. Клиент Б | сумма: 700"); + }); + + it("renders top incoming deals for highest inflow wording", () => { + const reply = composeFactualReply( + "customer_revenue_and_payments", + [ + { + period: "2020-03-01T00:00:00Z", + registrator: "Поступление 1", + account_dt: "", + account_kt: "", + amount: 500, + analytics: ["Клиент А", "Договор А-1"] + }, + { + period: "2020-03-02T00:00:00Z", + registrator: "Поступление 2", + account_dt: "", + account_kt: "", + amount: 700, + analytics: ["Клиент Б", "Договор Б-1"] + } + ], + { userMessage: "какие приходы самые высокие за все время" } + ); + + expect(reply.responseType).toBe("FACTUAL_LIST"); + expect(reply.text).toContain("самых крупных разовых сделок по поступлениям"); + expect(reply.text).toContain("Поступление 2"); + }); + + it("renders max-single ranking for highest-check typo wording", () => { + const reply = composeFactualReply( + "customer_revenue_and_payments", + [ + { + period: "2020-03-01T00:00:00Z", + registrator: "Поступление 1", + account_dt: "", + account_kt: "", + amount: 500, + analytics: ["Клиент А", "Договор А-1"] + }, + { + period: "2020-03-02T00:00:00Z", + registrator: "Поступление 2", + account_dt: "", + account_kt: "", + amount: 1200, + analytics: ["Клиент Б", "Договор Б-1"] + }, + { + period: "2020-03-03T00:00:00Z", + registrator: "Поступление 3", + account_dt: "", + account_kt: "", + amount: 300, + analytics: ["Клиент А", "Договор А-1"] + } + ], + { userMessage: "с каких кликентов самый высокий чек" } + ); + + expect(reply.responseType).toBe("FACTUAL_LIST"); + expect(reply.text).toContain("по максимальной сумме одной входящей операции"); + expect(reply.text).toContain("1. Клиент Б | max single: 1200"); + }); + + it("renders supplier payout list by operations count", () => { + const reply = composeFactualReply( + "supplier_payouts_profile", + [ + { + period: "2020-03-01T00:00:00Z", + registrator: "Списание 1", + account_dt: "", + account_kt: "", + amount: 100, + analytics: ["Поставщик А", "Договор А-1"] + }, + { + period: "2020-03-02T00:00:00Z", + registrator: "Списание 2", + account_dt: "", + account_kt: "", + amount: 120, + analytics: ["Поставщик А", "Договор А-2"] + }, + { + period: "2020-03-03T00:00:00Z", + registrator: "Списание 3", + account_dt: "", + account_kt: "", + amount: 500, + analytics: ["Поставщик Б", "Договор Б-1"] + } + ], + { userMessage: "топ-20 поставщиков по количеству исходящих платежных операций" } + ); + + expect(reply.responseType).toBe("FACTUAL_LIST"); + expect(reply.text).toContain("Топ-2 поставщиков по количеству исходящих платежных операций:"); + expect(reply.text).toContain("1. Поставщик А | операций: 2"); + }); + + it("renders contract value list for minimal active budgets", () => { + const reply = composeFactualReply( + "contract_usage_and_value", + [ + { + period: "2020-03-01T00:00:00Z", + registrator: "CT_VALUE_IN", + account_dt: "", + account_kt: "", + amount: 900, + analytics: ["Клиент А", "Договор 01/20"] + }, + { + period: "2020-03-02T00:00:00Z", + registrator: "CT_VALUE_OUT", + account_dt: "", + account_kt: "", + amount: 100, + analytics: ["Поставщик Б", "Договор 02/20"] + }, + { + period: "2020-03-03T00:00:00Z", + registrator: "CT_VALUE_IN", + account_dt: "", + account_kt: "", + amount: 150, + analytics: ["Клиент В", "Договор 03/20"] + } + ], + { userMessage: "покажи топ-20 договоров с минимальным бюджетом среди активных договоров" } + ); + + expect(reply.responseType).toBe("FACTUAL_LIST"); + expect(reply.text).toContain("активных договоров с минимальным бюджетом"); + expect(reply.text).toContain("1. Договор 02/20 | оборот: 100"); + }); }); describe("address intent resolver expansion (M2.3a)", () => { @@ -1150,6 +1353,31 @@ describe("address intent resolver expansion (M2.3a)", () => { expect(result.intent).toBe("contract_usage_overview"); }); + it("resolves customer revenue/payout ranking intent", () => { + const result = resolveAddressIntent("какие клиенты самые доходные, выдай топ-20"); + expect(result.intent).toBe("customer_revenue_and_payments"); + }); + + it("resolves customer revenue intent from highest inflow slang wording", () => { + const result = resolveAddressIntent("какие приходы самые высокие за все время"); + expect(result.intent).toBe("customer_revenue_and_payments"); + }); + + it("resolves customer revenue intent from typo highest-check wording", () => { + const result = resolveAddressIntent("с каких кликентов самый высокий чек"); + expect(result.intent).toBe("customer_revenue_and_payments"); + }); + + it("resolves supplier payouts profile intent from slang wording", () => { + const result = resolveAddressIntent("кому мы больше всего сгрузили денег, топ-20 поставщиков"); + expect(result.intent).toBe("supplier_payouts_profile"); + }); + + it("resolves contract usage and value intent", () => { + const result = resolveAddressIntent("договоры по обороту ранкни и дай топ-20"); + expect(result.intent).toBe("contract_usage_and_value"); + }); + it("resolves contracts-by-counterparty intent from list wording", () => { const result = resolveAddressIntent("покажи договора все по жуковке 51"); expect(result.intent).toBe("list_contracts_by_counterparty"); @@ -1175,21 +1403,42 @@ describe("address filter extraction for balance drilldown", () => { "Сколько всего договоров заведено и сколько из них реально использовались?", "contract_usage_overview" ); + const customerValue = extractAddressFilters( + "какие клиенты самые доходные, выдай топ-20", + "customer_revenue_and_payments" + ); + const supplierValue = extractAddressFilters( + "кому мы больше всего сгрузили денег, топ-20 поставщиков", + "supplier_payouts_profile" + ); + const contractValue = extractAddressFilters( + "договоры по обороту ранкни и дай топ-20", + "contract_usage_and_value" + ); expect(periodProfile.extracted_filters.limit).toBeUndefined(); expect(docSectionProfile.extracted_filters.limit).toBeUndefined(); expect(counterpartyProfile.extracted_filters.limit).toBeUndefined(); expect(counterpartyLifecycle.extracted_filters.limit).toBeUndefined(); expect(contractOverview.extracted_filters.limit).toBeUndefined(); + expect(customerValue.extracted_filters.limit).toBe(20); + expect(supplierValue.extracted_filters.limit).toBe(20); + expect(contractValue.extracted_filters.limit).toBe(20); expect(periodProfile.extracted_filters.period_to).toBeDefined(); expect(docSectionProfile.extracted_filters.period_to).toBeDefined(); expect(counterpartyProfile.extracted_filters.period_to).toBeDefined(); expect(counterpartyLifecycle.extracted_filters.period_to).toBeDefined(); expect(contractOverview.extracted_filters.period_to).toBeDefined(); + expect(customerValue.extracted_filters.period_to).toBeDefined(); + expect(supplierValue.extracted_filters.period_to).toBeDefined(); + expect(contractValue.extracted_filters.period_to).toBeDefined(); expect(periodProfile.warnings).toContain("period_to_defaulted_today_for_management_profile"); expect(docSectionProfile.warnings).toContain("period_to_defaulted_today_for_management_profile"); expect(counterpartyProfile.warnings).toContain("period_to_defaulted_today_for_management_profile"); expect(counterpartyLifecycle.warnings).not.toContain("period_to_defaulted_today_for_management_profile"); expect(contractOverview.warnings).toContain("period_to_defaulted_today_for_management_profile"); + expect(customerValue.warnings).toContain("period_to_defaulted_today_for_management_profile"); + expect(supplierValue.warnings).toContain("period_to_defaulted_today_for_management_profile"); + expect(contractValue.warnings).toContain("period_to_defaulted_today_for_management_profile"); }); it("extracts short-year period for lifecycle customer list question", () => { @@ -1626,6 +1875,56 @@ describe("address query limited taxonomy and stage diagnostics", () => { expect(["FACTUAL_SUMMARY", "LIMITED_WITH_REASON"]).toContain(result?.response_type); }); + it("routes customer value question into dedicated aggregate recipe", async () => { + const service = new AddressQueryService(); + const result = await service.tryHandle("какие клиенты самые доходные, выдай топ-20"); + expect(result?.handled).toBe(true); + expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); + expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); + expect(result?.debug.mcp_call_status).not.toBe("skipped"); + expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + }); + + it("routes highest inflow slang wording into customer value aggregate recipe", async () => { + const service = new AddressQueryService(); + const result = await service.tryHandle("какие приходы самые высокие за все время"); + expect(result?.handled).toBe(true); + expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); + expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); + expect(result?.debug.mcp_call_status).not.toBe("skipped"); + expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + }); + + it("routes typo highest-check wording into customer value aggregate recipe", async () => { + const service = new AddressQueryService(); + const result = await service.tryHandle("с каких кликентов самый высокий чек"); + expect(result?.handled).toBe(true); + expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); + expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); + expect(result?.debug.mcp_call_status).not.toBe("skipped"); + expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + }); + + it("routes supplier payout question into dedicated aggregate recipe", async () => { + const service = new AddressQueryService(); + const result = await service.tryHandle("кому мы больше всего сгрузили денег, топ-20 поставщиков"); + expect(result?.handled).toBe(true); + expect(result?.debug.detected_intent).toBe("supplier_payouts_profile"); + expect(result?.debug.selected_recipe).toBe("address_supplier_payouts_profile_v1"); + expect(result?.debug.mcp_call_status).not.toBe("skipped"); + expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + }); + + it("routes contract value question into dedicated aggregate recipe", async () => { + const service = new AddressQueryService(); + const result = await service.tryHandle("договоры по обороту ранкни и дай топ-20"); + expect(result?.handled).toBe(true); + expect(result?.debug.detected_intent).toBe("contract_usage_and_value"); + expect(result?.debug.selected_recipe).toBe("address_contract_usage_and_value_v1"); + expect(result?.debug.mcp_call_status).not.toBe("skipped"); + expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + }); + it("routes customer lifecycle question into dedicated aggregate recipe", async () => { const service = new AddressQueryService(); const result = await service.tryHandle("Какие заказчики работали с нами в 2020 году?"); @@ -1966,6 +2265,40 @@ describe("address recipe catalog counterparty filtering", () => { expect(plan.query).toContain("ДоговорКонтрагента"); }); + it("selects customer value recipe and keeps top-20 default", () => { + const selected = selectAddressRecipe("customer_revenue_and_payments", {}); + expect(selected.selected_recipe).toBeTruthy(); + const plan = buildAddressRecipePlan(selected.selected_recipe!, {}); + + expect(plan.recipe.recipe_id).toBe("address_customer_revenue_and_payments_v1"); + expect(plan.limit).toBe(20); + expect(plan.query).toContain("ПоступлениеНаРасчетныйСчет"); + expect(plan.query).toContain("БанкПоступление.ДоговорКонтрагента"); + }); + + it("selects supplier payouts recipe and keeps top-20 default", () => { + const selected = selectAddressRecipe("supplier_payouts_profile", {}); + expect(selected.selected_recipe).toBeTruthy(); + const plan = buildAddressRecipePlan(selected.selected_recipe!, {}); + + expect(plan.recipe.recipe_id).toBe("address_supplier_payouts_profile_v1"); + expect(plan.limit).toBe(20); + expect(plan.query).toContain("СписаниеСРасчетногоСчета"); + expect(plan.query).toContain("БанкСписание.ДоговорКонтрагента"); + }); + + it("selects contract value recipe and keeps top-20 default", () => { + const selected = selectAddressRecipe("contract_usage_and_value", {}); + expect(selected.selected_recipe).toBeTruthy(); + const plan = buildAddressRecipePlan(selected.selected_recipe!, {}); + + expect(plan.recipe.recipe_id).toBe("address_contract_usage_and_value_v1"); + expect(plan.limit).toBe(20); + expect(plan.query).toContain("CT_VALUE_IN"); + expect(plan.query).toContain("CT_VALUE_OUT"); + expect(plan.query).toContain("ДоговорКонтрагента"); + }); + it("selects contracts-by-counterparty recipe from contract catalog", () => { const selected = selectAddressRecipe("list_contracts_by_counterparty", { counterparty: "Жуковка 51"