From 4b83e2b2de7f021db8b38473a52b82bd8c994cdb Mon Sep 17 00:00:00 2001 From: dctouch Date: Fri, 17 Apr 2026 16:59:24 +0300 Subject: [PATCH] =?UTF-8?q?=D0=90=D0=A0=D0=A7=20=D0=90=D0=9F11=20-=20?= =?UTF-8?q?=D0=90=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=82=D1=83=D1=80?= =?UTF-8?q?=D0=B0:=20=D0=B2=D1=8B=D0=BD=D0=B5=D1=81=D1=82=D0=B8=20data=20s?= =?UTF-8?q?cope=20=D0=B8=20organization=20history=20=D0=B8=D0=B7=20assista?= =?UTF-8?q?ntService=20=D0=B2=20=D0=BE=D1=82=D0=B4=D0=B5=D0=BB=D1=8C=D0=BD?= =?UTF-8?q?=D1=8B=D0=B9=20owner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../05 - assistantService_extraction_map.md | 22 +- .../08 - current_status_audit_2026-04-17.md | 8 +- .../dist/services/assistantDataScopePolicy.js | 436 ++++++++++++++ .../backend/dist/services/assistantService.js | 185 +++--- .../src/services/assistantDataScopePolicy.ts | 550 ++++++++++++++++++ .../backend/src/services/assistantService.ts | 184 +++--- 6 files changed, 1162 insertions(+), 223 deletions(-) create mode 100644 llm_normalizer/backend/dist/services/assistantDataScopePolicy.js create mode 100644 llm_normalizer/backend/src/services/assistantDataScopePolicy.ts diff --git a/docs/ARCH/11 - architecture_turnaround/05 - assistantService_extraction_map.md b/docs/ARCH/11 - architecture_turnaround/05 - assistantService_extraction_map.md index 014e8f8..e517d15 100644 --- a/docs/ARCH/11 - architecture_turnaround/05 - assistantService_extraction_map.md +++ b/docs/ARCH/11 - architecture_turnaround/05 - assistantService_extraction_map.md @@ -14,7 +14,7 @@ The goal is to turn it from a god-service into a thinner coordinator. Approximate size: -- `5198` lines +- `5178` lines It currently mixes concerns from: @@ -114,13 +114,13 @@ Current owner: Current references: -- [assistantService.ts:3974](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:3974) -- [assistantService.ts:4412](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:4412) -- [assistantService.ts:6052](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:6052) +- [assistantService.ts:4284](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:4284) +- [assistantService.ts:4977](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:4977) Target owner: - `assistantBoundaryPolicy` +- `assistantDataScopePolicy` Expected artifact: @@ -201,17 +201,19 @@ This order is chosen because route and transition pressure are currently the mai This extraction is materially underway and no longer just a proposal. -Current active owner creation and wiring in [assistantService.ts](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:4283): +Current active owner creation and wiring in [assistantService.ts](/x:/1C/NDC_1C/llm_normalizer/backend/src/services/assistantService.ts:4188): -- provider owner near `4283` -- meta and memory owners near `4296-4301` -- route owner near `4306` -- transition owner near `4343` -- boundary owner near `4997` +- provider owner near `4188` +- meta and memory owners near `4201-4206` +- route owner near `4211` +- transition owner near `4250` +- data-scope owner near `4284` +- boundary owner near `4977` What is already true: - route, transition, boundary, meta, memory, and provider policies have explicit external owners; +- data-scope probing and organization-history extraction now also have an explicit owner; - runtime already delegates important decisions to those owners. What is still not fully true: diff --git a/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md index 4f586ca..a43f9cd 100644 --- a/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md @@ -126,7 +126,7 @@ This is enough to build targeted semantic packs that are not single-domain toy s ## Honest Phase Status -Estimated overall turnaround completion: `~85%` +Estimated overall turnaround completion: `~86%` ### Phase 0. Shared Baseline @@ -191,15 +191,16 @@ Remaining debt: ### Phase 5. AssistantService Extraction -Status: `76%` +Status: `79%` Reason: - major policy categories have real owners outside the coordinator. +- data-scope probing and organization-history extraction are now delegated to a dedicated owner. Remaining debt: -- `assistantService.ts` is still about `5198` lines; +- `assistantService.ts` is still about `5178` lines; - runtime uses extracted owners, but legacy bodies and fallback branches still live in the coordinator file; - code review still sometimes requires reading `assistantService` together with extracted owners. @@ -240,6 +241,7 @@ Compared with the pre-turnaround baseline, the system is now materially better i - temporal honesty is now evaluated as an explicit invariant; - factual-negative answers can remain truthful instead of collapsing into generic technical refusals; - meta questions and memory recap are no longer purely incidental side effects of route logic; +- organization data-scope probing is no longer owned only by coordinator-local helper bodies; - architecture regressions can now be localized to route, transition, truth gate, coverage/evidence, boundary, or meta/memory layers. ## What Still Remains The Main Architectural Debt diff --git a/llm_normalizer/backend/dist/services/assistantDataScopePolicy.js b/llm_normalizer/backend/dist/services/assistantDataScopePolicy.js new file mode 100644 index 0000000..e905491 --- /dev/null +++ b/llm_normalizer/backend/dist/services/assistantDataScopePolicy.js @@ -0,0 +1,436 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.createAssistantDataScopePolicy = createAssistantDataScopePolicy; +// @ts-nocheck +const assistantOrganizationMatcher_1 = require("./assistantOrganizationMatcher"); +const DATA_SCOPE_CACHE_TTL_MS = 60_000; +function normalizeScopeLabel(value) { + return String(value ?? "") + .replace(/[“”«»]/g, '"') + .replace(/\s+/g, " ") + .trim(); +} +function normalizeScopeKey(value) { + return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е"); +} +function normalizeGuidValue(value) { + const source = normalizeScopeLabel(value); + if (!source) { + return null; + } + const match = source.match(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/i); + return match ? String(match[0]).toLowerCase() : null; +} +function extractGuidValuesFromText(value) { + const source = normalizeScopeLabel(value); + if (!source) { + return []; + } + const matches = source.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi); + if (!matches || matches.length === 0) { + return []; + } + return Array.from(new Set(matches.map((item) => String(item).toLowerCase()))); +} +function hasOrganizationKeyHint(key) { + return /(?:организац|organization|company|контор|org)\b/i.test(String(key ?? "")); +} +function hasNameKeyHint(key) { + return /(?:представ|наимен|name|title|display|presentation|description)\b/i.test(String(key ?? "")); +} +function hasGuidKeyHint(key) { + return /(?:идентифик|guid|uuid|key|ref|ссылк|\bid\b)\b/i.test(String(key ?? "")); +} +function looksLikeOrganizationTypeMarker(value) { + const normalized = normalizeScopeKey(value); + return /(?:справочникссылка\.\s*организац|catalogref\.\s*organization|organization|company|организац)/i.test(normalized); +} +function isPlausibleOrganizationName(value) { + const candidate = normalizeScopeLabel(value); + if (!candidate) { + return false; + } + if (/^(?:период|регистратор|счетдт|счеткт|amount|period|registrator|accountdt|accountkt)$/i.test(candidate)) { + return false; + } + if (/^[0-9._:/\\-]+$/i.test(candidate)) { + return false; + } + if (/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i.test(candidate)) { + return false; + } + if (/(?:справочникссылка|документссылка|плансчетовссылка|standardodata|recordtype|cmp:)/i.test(candidate)) { + return false; + } + return /[A-Za-z\u0400-\u04FF]/u.test(candidate); +} +function appendOrganizationFactsFromValue(value, hints, bucket, depth = 0) { + if (depth > 4 || value === null || value === undefined) { + return; + } + if (typeof value === "string") { + for (const guid of extractGuidValuesFromText(value)) { + if (hints.guidHint || hints.organizationHint || hints.nameHint) { + bucket.refs.push(guid); + } + } + if ((hints.organizationHint || hints.nameHint) && isPlausibleOrganizationName(value)) { + const normalized = normalizeScopeLabel(value); + if (normalized) { + bucket.names.push(normalized); + } + } + return; + } + if (Array.isArray(value)) { + for (const item of value) { + appendOrganizationFactsFromValue(item, hints, bucket, depth + 1); + } + return; + } + if (typeof value !== "object") { + return; + } + const entries = Object.entries(value); + let objectIsOrganization = false; + let hasObjectRefMarker = false; + let hasGuidLikeField = false; + let hasTypeMarker = false; + for (const [rawKey, rawVal] of entries) { + const key = normalizeScopeKey(rawKey); + if ((key.includes("objectref") || key.includes("_objectref")) && rawVal === true) { + hasObjectRefMarker = true; + } + if (typeof rawVal === "string" && normalizeGuidValue(rawVal)) { + hasGuidLikeField = true; + } + if (hasOrganizationKeyHint(key)) { + objectIsOrganization = true; + break; + } + if ((key.includes("типобъекта") || key.includes("type")) && typeof rawVal === "string" && looksLikeOrganizationTypeMarker(rawVal)) { + objectIsOrganization = true; + hasTypeMarker = true; + break; + } + } + if (!objectIsOrganization && hasObjectRefMarker && hasGuidLikeField) { + const hasNameLikeValue = entries.some(([rawKey, rawVal]) => { + if (typeof rawVal !== "string") { + return false; + } + const key = normalizeScopeKey(rawKey); + return hasNameKeyHint(key) || isPlausibleOrganizationName(rawVal); + }); + if (hasTypeMarker || hasNameLikeValue) { + objectIsOrganization = true; + } + } + for (const [rawKey, rawVal] of entries) { + if (String(rawKey ?? "").startsWith("__")) { + continue; + } + const key = normalizeScopeKey(rawKey); + const childHints = { + organizationHint: hints.organizationHint || objectIsOrganization || hasOrganizationKeyHint(key), + nameHint: hints.nameHint || objectIsOrganization || hasNameKeyHint(key), + guidHint: hints.guidHint || objectIsOrganization || hasGuidKeyHint(key) + }; + if (typeof rawVal === "string") { + const guid = normalizeGuidValue(rawVal); + if (guid && childHints.guidHint) { + bucket.refs.push(guid); + } + } + appendOrganizationFactsFromValue(rawVal, childHints, bucket, depth + 1); + } +} +function buildResolvedDataScopeProbe(status, activeMcpChannel, organizations) { + return { + status, + channel: activeMcpChannel, + organizations: Array.from(new Set(Array.isArray(organizations) ? organizations : [])).slice(0, 20), + error: null + }; +} +function createAssistantDataScopePolicy(deps) { + const dataScopeProbeCache = new Map(); + function parseOrganizationsFromDataScopeAssistantText(text) { + const source = deps.repairAddressMojibake(String(text ?? "")); + if (!source) { + return []; + } + const extracted = []; + const singleMatch = source.match(/доступна\s+организация:\s*([^.\n]+)/iu); + if (singleMatch) { + const value = (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(singleMatch[1]); + if (value) { + extracted.push(value); + } + } + const multiMatch = source.match(/доступны\s+организац(?:ии|ия)\s*(?:\(\d+\))?:\s*([^.\n]+)/iu); + if (multiMatch) { + const parts = String(multiMatch[1] ?? "") + .split(",") + .map((item) => (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(item)) + .filter((item) => Boolean(item)); + extracted.push(...parts); + } + return Array.from(new Set(extracted)); + } + function extractKnownOrganizationsFromHistory(items) { + const collected = []; + for (let index = (Array.isArray(items) ? items.length : 0) - 1; index >= 0; index -= 1) { + const item = Array.isArray(items) ? items[index] : null; + if (!item || typeof item !== "object" || item.role !== "assistant") { + continue; + } + const debug = item.debug; + if (debug && typeof debug === "object") { + const directFromProbe = Array.isArray(debug.living_chat_data_scope_probe_organizations) + ? debug.living_chat_data_scope_probe_organizations + : []; + const knownFromDebug = Array.isArray(debug.assistant_known_organizations) + ? debug.assistant_known_organizations + : []; + const directFromCandidates = Array.isArray(debug.organization_candidates) ? debug.organization_candidates : []; + const directFromResolved = [ + (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.assistant_active_organization), + (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.living_chat_selected_organization), + (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.extracted_filters?.organization), + (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.address_root_frame_context?.organization) + ].filter((value) => Boolean(value)); + if (directFromProbe.length > 0 || + knownFromDebug.length > 0 || + directFromCandidates.length > 0 || + directFromResolved.length > 0) { + collected.push(...directFromProbe, ...knownFromDebug, ...directFromCandidates, ...directFromResolved); + } + } + const parsedFromText = parseOrganizationsFromDataScopeAssistantText(item.text); + if (parsedFromText.length > 0) { + collected.push(...parsedFromText); + } + if (collected.length >= 20) { + break; + } + } + return (0, assistantOrganizationMatcher_1.mergeKnownOrganizations)(collected, 20); + } + function findLastAssistantActiveOrganization(items) { + for (let index = (Array.isArray(items) ? items.length : 0) - 1; index >= 0; index -= 1) { + const item = Array.isArray(items) ? items[index] : null; + if (!item || typeof item !== "object" || item.role !== "assistant") { + continue; + } + const debug = item.debug; + if (!debug || typeof debug !== "object") { + continue; + } + const direct = (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.assistant_active_organization); + if (direct) { + return direct; + } + const selected = (0, assistantOrganizationMatcher_1.normalizeOrganizationScopeValue)(debug.living_chat_selected_organization); + if (selected) { + return selected; + } + } + return null; + } + function extractOrganizationFactsFromRows(rows) { + const names = []; + const refs = []; + const pairs = []; + for (const row of Array.isArray(rows) ? rows : []) { + if (!row || typeof row !== "object") { + continue; + } + const rowNames = []; + const rowRefs = []; + for (const [rawKey, rawValue] of Object.entries(row)) { + if (String(rawKey ?? "").startsWith("__")) { + continue; + } + const key = normalizeScopeKey(rawKey); + appendOrganizationFactsFromValue(rawValue, { + organizationHint: hasOrganizationKeyHint(key), + nameHint: hasNameKeyHint(key), + guidHint: hasGuidKeyHint(key) + }, { names: rowNames, refs: rowRefs }); + } + const dedupRowNames = Array.from(new Set(rowNames)) + .filter((item) => isPlausibleOrganizationName(item)) + .slice(0, 20); + const dedupRowRefs = Array.from(new Set(rowRefs)) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(item)) + .slice(0, 20); + if (dedupRowNames.length === 0 && dedupRowRefs.length === 0) { + const fallbackBucket = { names: [], refs: [] }; + appendOrganizationFactsFromValue(row, { + organizationHint: true, + nameHint: true, + guidHint: true + }, fallbackBucket); + for (const value of fallbackBucket.names) { + if (isPlausibleOrganizationName(value)) { + dedupRowNames.push(value); + } + } + for (const value of fallbackBucket.refs) { + const normalized = String(value ?? "").toLowerCase(); + if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(normalized)) { + dedupRowRefs.push(normalized); + } + } + } + names.push(...dedupRowNames); + refs.push(...dedupRowRefs); + if (dedupRowRefs.length > 0 && dedupRowNames.length > 0) { + for (const ref of dedupRowRefs) { + for (const name of dedupRowNames) { + pairs.push({ ref, name }); + } + } + } + } + return { + names: Array.from(new Set(names)).slice(0, 20), + refs: Array.from(new Set(refs)).slice(0, 20), + pairs: Array.from(new Set(pairs.map((item) => `${item.ref}||${item.name}`))) + .map((token) => { + const [ref, name] = token.split("||"); + return { ref, name }; + }) + .slice(0, 100) + }; + } + function resolveOrganizationNamesByRefs(refs, facts) { + const refSet = new Set((Array.isArray(refs) ? refs : []) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => item.length > 0)); + if (refSet.size === 0) { + return []; + } + const names = []; + for (const pair of Array.isArray(facts?.pairs) ? facts?.pairs : []) { + const ref = String(pair?.ref ?? "").toLowerCase(); + const name = normalizeScopeLabel(pair?.name ?? ""); + if (!ref || !name || !refSet.has(ref)) { + continue; + } + names.push(name); + } + return Array.from(new Set(names)).slice(0, 20); + } + async function resolveAssistantDataScopeProbe() { + const cacheKey = `${deps.mcpProxyUrl}|${deps.activeMcpChannel}`; + const now = Date.now(); + const cached = dataScopeProbeCache.get(cacheKey); + if (cached && Number(cached.expiresAt ?? 0) > now) { + return cached.value; + } + if (String(process.env.NODE_ENV ?? "").toLowerCase() === "test") { + return { + status: "skipped_test_env", + channel: deps.activeMcpChannel, + organizations: [], + error: null + }; + } + const catalogQueryCandidates = [ + "ВЫБРАТЬ ПЕРВЫЕ 20 Организации.Наименование КАК Организация ИЗ Справочник.Организации КАК Организации", + "ВЫБРАТЬ ПЕРВЫЕ 20 Организации.НаименованиеПолное КАК Организация ИЗ Справочник.Организации КАК Организации", + "ВЫБРАТЬ ПЕРВЫЕ 100 Организации.Ссылка КАК Организация, ПРЕДСТАВЛЕНИЕ(Организации.Ссылка) КАК ОрганизацияПредставление ИЗ Справочник.Организации КАК Организации" + ]; + const movementProbeCandidates = [ + "ВЫБРАТЬ ПЕРВЫЕ 60 Движения.Организация КАК Организация ИЗ РегистрБухгалтерии.Хозрасчетный КАК Движения УПОРЯДОЧИТЬ ПО Движения.Период УБЫВ", + "ВЫБРАТЬ ПЕРВЫЕ 60 Движения.Организация КАК Организация ИЗ РегистрБухгалтерии.Хозрасчетный КАК Движения" + ]; + let lastError = null; + const catalogFacts = { names: [], refs: [], pairs: [] }; + for (const queryText of catalogQueryCandidates) { + const probe = await deps.executeAddressMcpQuery({ + query: queryText, + limit: 100 + }); + if (probe.error) { + lastError = String(probe.error); + continue; + } + const facts = extractOrganizationFactsFromRows(Array.isArray(probe.rows) ? probe.rows : []); + catalogFacts.names.push(...facts.names); + catalogFacts.refs.push(...facts.refs); + catalogFacts.pairs.push(...facts.pairs); + if (facts.names.length > 0) { + const resolved = buildResolvedDataScopeProbe("resolved", deps.activeMcpChannel, facts.names); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + const movementFacts = { names: [], refs: [], pairs: [] }; + for (const queryText of movementProbeCandidates) { + const probe = await deps.executeAddressMcpQuery({ + query: queryText, + limit: 60 + }); + if (probe.error) { + lastError = String(probe.error); + continue; + } + const facts = extractOrganizationFactsFromRows(Array.isArray(probe.rows) ? probe.rows : []); + movementFacts.names.push(...facts.names); + movementFacts.refs.push(...facts.refs); + movementFacts.pairs.push(...facts.pairs); + if (facts.names.length > 0) { + const resolved = buildResolvedDataScopeProbe("resolved_from_activity", deps.activeMcpChannel, facts.names); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + const movementRefs = Array.from(new Set(movementFacts.refs)) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => item.length > 0); + if (movementRefs.length > 0) { + const namesFromCatalogPairs = resolveOrganizationNamesByRefs(movementRefs, { + names: Array.from(new Set(catalogFacts.names)), + refs: Array.from(new Set(catalogFacts.refs)), + pairs: catalogFacts.pairs + }); + if (namesFromCatalogPairs.length > 0) { + const resolved = buildResolvedDataScopeProbe("resolved_from_ref_lookup", deps.activeMcpChannel, namesFromCatalogPairs); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + const fallback = { + status: lastError ? "unresolved_with_error" : "unresolved", + channel: deps.activeMcpChannel, + organizations: [], + error: lastError + }; + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: fallback + }); + return fallback; + } + return { + parseOrganizationsFromDataScopeAssistantText, + extractKnownOrganizationsFromHistory, + findLastAssistantActiveOrganization, + extractOrganizationFactsFromRows, + resolveOrganizationNamesByRefs, + resolveAssistantDataScopeProbe + }; +} diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index cce277c..a3c35e0 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -65,6 +65,7 @@ const capabilitiesRegistry_1 = __importStar(require("./capabilitiesRegistry")); const assistantCanon_1 = __importStar(require("./assistantCanon")); const assistantAddressAttemptRuntimeAdapter_1 = __importStar(require("./assistantAddressAttemptRuntimeAdapter")); const assistantCoverageGrounding_1 = __importStar(require("./assistantCoverageGrounding")); +const assistantDataScopePolicy_1 = __importStar(require("./assistantDataScopePolicy")); const assistantDeepTurnAttemptRuntimeAdapter_1 = __importStar(require("./assistantDeepTurnAttemptRuntimeAdapter")); const assistantBoundaryPolicy_1 = __importStar(require("./assistantBoundaryPolicy")); const assistantLivingModePolicy_1 = __importStar(require("./assistantLivingModePolicy")); @@ -74,14 +75,14 @@ const assistantProviderExecutionPolicy_1 = __importStar(require("./assistantProv const assistantRoutePolicy_1 = __importStar(require("./assistantRoutePolicy")); const assistantTransitionPolicy_1 = __importStar(require("./assistantTransitionPolicy")); const assistantOrganizationScopeRuntimeAdapter_1 = __importStar(require("./assistantOrganizationScopeRuntimeAdapter")); +const assistantOrganizationMatcher_1 = __importStar(require("./assistantOrganizationMatcher")); const assistantTurnAttemptRuntimeAdapter_1 = __importStar(require("./assistantTurnAttemptRuntimeAdapter")); const assistantTurnRuntimeDepsAdapter_1 = __importStar(require("./assistantTurnRuntimeDepsAdapter")); const assistantTurnRuntimeInputBuilder_1 = __importStar(require("./assistantTurnRuntimeInputBuilder")); const assistantUserTurnBootstrapRuntimeAdapter_1 = __importStar(require("./assistantUserTurnBootstrapRuntimeAdapter")); const assistantQueryPlanning_1 = __importStar(require("./assistantQueryPlanning")); const iconv_lite_1 = __importDefault(require("iconv-lite")); -const DATA_SCOPE_CACHE_TTL_MS = 60_000; -const dataScopeProbeCache = new Map(); +const normalizeOrganizationScopeValue = assistantOrganizationMatcher_1.normalizeOrganizationScopeValue; function retrievalSummaryForRoute(route) { if (route === "store_canonical") return "Canonical accounting data path selected."; @@ -4225,105 +4226,6 @@ function hasLivingChatSignal(text) { function buildAssistantCapabilityContractReply() { return (0, capabilitiesRegistry_1.buildCapabilityContractReplyFromRegistry)(); } -function normalizeScopeLabel(value) { - const repaired = repairAddressMojibake(String(value ?? "")); - let normalized = compactWhitespace(repaired.trim()); - for (let index = 0; index < 2; index += 1) { - const first = normalized[0]; - const last = normalized[normalized.length - 1]; - const wrappedInQuotes = (first === "\"" && last === "\"") || - (first === "'" && last === "'") || - (first === "«" && last === "»"); - if (!wrappedInQuotes) { - break; - } - normalized = compactWhitespace(normalized.slice(1, -1).trim()); - } - if (!normalized) { - return null; - } - if (/^(?:null|undefined|nan|0|не\s*заполнено)$/i.test(normalized)) { - return null; - } - return normalized; -} -function normalizeScopeKey(value) { - return repairAddressMojibake(String(value ?? "")).toLowerCase().replace(/ё/g, "е"); -} -const ORGANIZATION_SCOPE_STOPWORDS = new Set([ - "ооо", - "ao", - "ао", - "зао", - "ип", - "llc", - "ltd", - "company", - "компания", - "организация", - "организации", - "контора", - "конторы", - "фирма", - "фирмы", - "по", - "для", - "над", - "под", - "без", - "с", - "со", - "в", - "во", - "на", - "и", - "или", - "а", - "но", - "не", - "мы", - "нам", - "наш", - "наша", - "наше", - "наши", - "ты", - "тебе", - "твой", - "сейчас", - "щас", - "тут", - "вот", - "давай", - "го", - "погнали", - "тогда", - "обсудим", - "обсуждать", - "работать", - "работаем", - "работаешь", - "работаете", - "можем", - "можно", - "какая", - "какой", - "какие", - "чья", - "чье", - "чьи" -]); -function normalizeOrganizationScopeValue(value) { - const normalized = normalizeScopeLabel(value); - if (!normalized) { - return null; - } - const unwrapped = normalized - .replace(/^"+|"+$/g, "") - .replace(/^'+|'+$/g, "") - .trim(); - return unwrapped ? unwrapped : null; -} const assistantProviderExecutionPolicy = (0, assistantProviderExecutionPolicy_1.createAssistantProviderExecutionPolicy)(); const assistantLivingModePolicy = (0, assistantLivingModePolicy_1.createAssistantLivingModePolicy)({ featureAssistantLivingChatRouterV1: config_1.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1, @@ -4420,6 +4322,78 @@ const assistantTransitionPolicy = (0, assistantTransitionPolicy_1.createAssistan extractDisplayedAddressEntityCandidates, resolveDisplayedAddressEntityMention }); +const assistantDataScopePolicy = (0, assistantDataScopePolicy_1.createAssistantDataScopePolicy)({ + activeMcpChannel: config_1.ASSISTANT_MCP_CHANNEL, + mcpProxyUrl: config_1.ASSISTANT_MCP_PROXY_URL, + executeAddressMcpQuery: addressMcpClient_1.executeAddressMcpQuery, + repairAddressMojibake +}); +function normalizeScopeKey(value) { + return repairAddressMojibake(String(value ?? "")).toLowerCase().replace(/ё/g, "е"); +} +const ORGANIZATION_SCOPE_STOPWORDS = new Set([ + "ооо", + "ao", + "ао", + "зао", + "ип", + "llc", + "ltd", + "company", + "компания", + "организация", + "организации", + "контора", + "конторы", + "фирма", + "фирмы", + "по", + "для", + "над", + "под", + "без", + "с", + "со", + "в", + "во", + "на", + "и", + "или", + "а", + "но", + "не", + "мы", + "нам", + "наш", + "наша", + "наше", + "наши", + "ты", + "тебе", + "твой", + "сейчас", + "щас", + "тут", + "вот", + "давай", + "го", + "погнали", + "тогда", + "обсудим", + "обсуждать", + "работать", + "работаем", + "работаешь", + "работаете", + "можем", + "можно", + "какая", + "какой", + "какие", + "чья", + "чье", + "чьи" +]); function normalizeOrganizationScopeSearchText(value) { const source = normalizeScopeKey(value); return source @@ -4679,9 +4653,9 @@ function resolveSessionOrganizationScopeContext(userMessage, items, addressNavig userMessage, items, addressNavigationState, - extractKnownOrganizationsFromHistory, + extractKnownOrganizationsFromHistory: assistantDataScopePolicy.extractKnownOrganizationsFromHistory, resolveOrganizationSelectionFromMessage, - findLastAssistantActiveOrganization, + findLastAssistantActiveOrganization: assistantDataScopePolicy.findLastAssistantActiveOrganization, normalizeOrganizationScopeValue }); } @@ -4935,12 +4909,13 @@ function buildResolvedDataScopeProbe(status, organizations) { }; } function extractOrganizationFactsFromRowsForTests(rows) { - return extractOrganizationFactsFromRows(rows); + return assistantDataScopePolicy.extractOrganizationFactsFromRows(rows); } function resolveOrganizationNamesByRefsForTests(refs, facts) { - return resolveOrganizationNamesByRefs(refs, facts); + return assistantDataScopePolicy.resolveOrganizationNamesByRefs(refs, facts); } async function resolveAssistantDataScopeProbe() { + return assistantDataScopePolicy.resolveAssistantDataScopeProbe(); const cacheKey = `${config_1.ASSISTANT_MCP_PROXY_URL}|${config_1.ASSISTANT_MCP_CHANNEL}`; const now = Date.now(); const cached = dataScopeProbeCache.get(cacheKey); diff --git a/llm_normalizer/backend/src/services/assistantDataScopePolicy.ts b/llm_normalizer/backend/src/services/assistantDataScopePolicy.ts new file mode 100644 index 0000000..42375d4 --- /dev/null +++ b/llm_normalizer/backend/src/services/assistantDataScopePolicy.ts @@ -0,0 +1,550 @@ +// @ts-nocheck +import { + mergeKnownOrganizations, + normalizeOrganizationScopeValue +} from "./assistantOrganizationMatcher"; + +const DATA_SCOPE_CACHE_TTL_MS = 60_000; + +export interface AssistantDataScopeProbe { + status: string; + channel: string; + organizations: string[]; + error: string | null; +} + +export interface AssistantOrganizationFacts { + names: string[]; + refs: string[]; + pairs: Array<{ + ref: string; + name: string; + }>; +} + +export interface AssistantDataScopePolicyDeps { + activeMcpChannel: string; + mcpProxyUrl: string; + executeAddressMcpQuery: (input: { query: string; limit: number }) => Promise<{ + rows?: unknown[]; + error?: string | null; + }>; + repairAddressMojibake: (value: unknown) => string; +} + +export interface AssistantDataScopePolicy { + parseOrganizationsFromDataScopeAssistantText: (text: unknown) => string[]; + extractKnownOrganizationsFromHistory: (items: unknown[]) => string[]; + findLastAssistantActiveOrganization: (items: unknown[]) => string | null; + extractOrganizationFactsFromRows: (rows: unknown[]) => AssistantOrganizationFacts; + resolveOrganizationNamesByRefs: (refs: unknown[], facts: AssistantOrganizationFacts | null | undefined) => string[]; + resolveAssistantDataScopeProbe: () => Promise; +} + +function normalizeScopeLabel(value: unknown): string { + return String(value ?? "") + .replace(/[“”«»]/g, '"') + .replace(/\s+/g, " ") + .trim(); +} + +function normalizeScopeKey(value: unknown): string { + return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е"); +} + +function normalizeGuidValue(value: unknown): string | null { + const source = normalizeScopeLabel(value); + if (!source) { + return null; + } + const match = source.match(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/i); + return match ? String(match[0]).toLowerCase() : null; +} + +function extractGuidValuesFromText(value: unknown): string[] { + const source = normalizeScopeLabel(value); + if (!source) { + return []; + } + const matches = source.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi); + if (!matches || matches.length === 0) { + return []; + } + return Array.from(new Set(matches.map((item) => String(item).toLowerCase()))); +} + +function hasOrganizationKeyHint(key: unknown): boolean { + return /(?:организац|organization|company|контор|org)\b/i.test(String(key ?? "")); +} + +function hasNameKeyHint(key: unknown): boolean { + return /(?:представ|наимен|name|title|display|presentation|description)\b/i.test(String(key ?? "")); +} + +function hasGuidKeyHint(key: unknown): boolean { + return /(?:идентифик|guid|uuid|key|ref|ссылк|\bid\b)\b/i.test(String(key ?? "")); +} + +function looksLikeOrganizationTypeMarker(value: unknown): boolean { + const normalized = normalizeScopeKey(value); + return /(?:справочникссылка\.\s*организац|catalogref\.\s*organization|organization|company|организац)/i.test( + normalized + ); +} + +function isPlausibleOrganizationName(value: unknown): boolean { + const candidate = normalizeScopeLabel(value); + if (!candidate) { + return false; + } + if (/^(?:период|регистратор|счетдт|счеткт|amount|period|registrator|accountdt|accountkt)$/i.test(candidate)) { + return false; + } + if (/^[0-9._:/\\-]+$/i.test(candidate)) { + return false; + } + if (/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i.test(candidate)) { + return false; + } + if (/(?:справочникссылка|документссылка|плансчетовссылка|standardodata|recordtype|cmp:)/i.test(candidate)) { + return false; + } + return /[A-Za-z\u0400-\u04FF]/u.test(candidate); +} + +function appendOrganizationFactsFromValue( + value: unknown, + hints: { organizationHint: boolean; nameHint: boolean; guidHint: boolean }, + bucket: { names: string[]; refs: string[] }, + depth = 0 +): void { + if (depth > 4 || value === null || value === undefined) { + return; + } + + if (typeof value === "string") { + for (const guid of extractGuidValuesFromText(value)) { + if (hints.guidHint || hints.organizationHint || hints.nameHint) { + bucket.refs.push(guid); + } + } + if ((hints.organizationHint || hints.nameHint) && isPlausibleOrganizationName(value)) { + const normalized = normalizeScopeLabel(value); + if (normalized) { + bucket.names.push(normalized); + } + } + return; + } + + if (Array.isArray(value)) { + for (const item of value) { + appendOrganizationFactsFromValue(item, hints, bucket, depth + 1); + } + return; + } + + if (typeof value !== "object") { + return; + } + + const entries = Object.entries(value); + let objectIsOrganization = false; + let hasObjectRefMarker = false; + let hasGuidLikeField = false; + let hasTypeMarker = false; + + for (const [rawKey, rawVal] of entries) { + const key = normalizeScopeKey(rawKey); + if ((key.includes("objectref") || key.includes("_objectref")) && rawVal === true) { + hasObjectRefMarker = true; + } + if (typeof rawVal === "string" && normalizeGuidValue(rawVal)) { + hasGuidLikeField = true; + } + if (hasOrganizationKeyHint(key)) { + objectIsOrganization = true; + break; + } + if ((key.includes("типобъекта") || key.includes("type")) && typeof rawVal === "string" && looksLikeOrganizationTypeMarker(rawVal)) { + objectIsOrganization = true; + hasTypeMarker = true; + break; + } + } + + if (!objectIsOrganization && hasObjectRefMarker && hasGuidLikeField) { + const hasNameLikeValue = entries.some(([rawKey, rawVal]) => { + if (typeof rawVal !== "string") { + return false; + } + const key = normalizeScopeKey(rawKey); + return hasNameKeyHint(key) || isPlausibleOrganizationName(rawVal); + }); + if (hasTypeMarker || hasNameLikeValue) { + objectIsOrganization = true; + } + } + + for (const [rawKey, rawVal] of entries) { + if (String(rawKey ?? "").startsWith("__")) { + continue; + } + const key = normalizeScopeKey(rawKey); + const childHints = { + organizationHint: hints.organizationHint || objectIsOrganization || hasOrganizationKeyHint(key), + nameHint: hints.nameHint || objectIsOrganization || hasNameKeyHint(key), + guidHint: hints.guidHint || objectIsOrganization || hasGuidKeyHint(key) + }; + if (typeof rawVal === "string") { + const guid = normalizeGuidValue(rawVal); + if (guid && childHints.guidHint) { + bucket.refs.push(guid); + } + } + appendOrganizationFactsFromValue(rawVal, childHints, bucket, depth + 1); + } +} + +function buildResolvedDataScopeProbe(status: string, activeMcpChannel: string, organizations: unknown[]): AssistantDataScopeProbe { + return { + status, + channel: activeMcpChannel, + organizations: Array.from(new Set(Array.isArray(organizations) ? organizations : [])).slice(0, 20) as string[], + error: null + }; +} + +export function createAssistantDataScopePolicy(deps: AssistantDataScopePolicyDeps): AssistantDataScopePolicy { + const dataScopeProbeCache = new Map(); + + function parseOrganizationsFromDataScopeAssistantText(text: unknown): string[] { + const source = deps.repairAddressMojibake(String(text ?? "")); + if (!source) { + return []; + } + + const extracted: string[] = []; + const singleMatch = source.match(/доступна\s+организация:\s*([^.\n]+)/iu); + if (singleMatch) { + const value = normalizeOrganizationScopeValue(singleMatch[1]); + if (value) { + extracted.push(value); + } + } + + const multiMatch = source.match(/доступны\s+организац(?:ии|ия)\s*(?:\(\d+\))?:\s*([^.\n]+)/iu); + if (multiMatch) { + const parts = String(multiMatch[1] ?? "") + .split(",") + .map((item) => normalizeOrganizationScopeValue(item)) + .filter((item): item is string => Boolean(item)); + extracted.push(...parts); + } + + return Array.from(new Set(extracted)); + } + + function extractKnownOrganizationsFromHistory(items: unknown[]): string[] { + const collected: unknown[] = []; + for (let index = (Array.isArray(items) ? items.length : 0) - 1; index >= 0; index -= 1) { + const item = Array.isArray(items) ? items[index] : null; + if (!item || typeof item !== "object" || (item as { role?: string }).role !== "assistant") { + continue; + } + + const debug = (item as { debug?: Record }).debug; + if (debug && typeof debug === "object") { + const directFromProbe = Array.isArray(debug.living_chat_data_scope_probe_organizations) + ? debug.living_chat_data_scope_probe_organizations + : []; + const knownFromDebug = Array.isArray(debug.assistant_known_organizations) + ? debug.assistant_known_organizations + : []; + const directFromCandidates = Array.isArray(debug.organization_candidates) ? debug.organization_candidates : []; + const directFromResolved = [ + normalizeOrganizationScopeValue(debug.assistant_active_organization), + normalizeOrganizationScopeValue(debug.living_chat_selected_organization), + normalizeOrganizationScopeValue(debug.extracted_filters?.organization), + normalizeOrganizationScopeValue(debug.address_root_frame_context?.organization) + ].filter((value): value is string => Boolean(value)); + + if ( + directFromProbe.length > 0 || + knownFromDebug.length > 0 || + directFromCandidates.length > 0 || + directFromResolved.length > 0 + ) { + collected.push(...directFromProbe, ...knownFromDebug, ...directFromCandidates, ...directFromResolved); + } + } + + const parsedFromText = parseOrganizationsFromDataScopeAssistantText((item as { text?: unknown }).text); + if (parsedFromText.length > 0) { + collected.push(...parsedFromText); + } + if (collected.length >= 20) { + break; + } + } + + return mergeKnownOrganizations(collected, 20); + } + + function findLastAssistantActiveOrganization(items: unknown[]): string | null { + for (let index = (Array.isArray(items) ? items.length : 0) - 1; index >= 0; index -= 1) { + const item = Array.isArray(items) ? items[index] : null; + if (!item || typeof item !== "object" || (item as { role?: string }).role !== "assistant") { + continue; + } + + const debug = (item as { debug?: Record }).debug; + if (!debug || typeof debug !== "object") { + continue; + } + + const direct = normalizeOrganizationScopeValue(debug.assistant_active_organization); + if (direct) { + return direct; + } + const selected = normalizeOrganizationScopeValue(debug.living_chat_selected_organization); + if (selected) { + return selected; + } + } + + return null; + } + + function extractOrganizationFactsFromRows(rows: unknown[]): AssistantOrganizationFacts { + const names: string[] = []; + const refs: string[] = []; + const pairs: Array<{ ref: string; name: string }> = []; + + for (const row of Array.isArray(rows) ? rows : []) { + if (!row || typeof row !== "object") { + continue; + } + + const rowNames: string[] = []; + const rowRefs: string[] = []; + for (const [rawKey, rawValue] of Object.entries(row)) { + if (String(rawKey ?? "").startsWith("__")) { + continue; + } + const key = normalizeScopeKey(rawKey); + appendOrganizationFactsFromValue( + rawValue, + { + organizationHint: hasOrganizationKeyHint(key), + nameHint: hasNameKeyHint(key), + guidHint: hasGuidKeyHint(key) + }, + { names: rowNames, refs: rowRefs } + ); + } + + const dedupRowNames = Array.from(new Set(rowNames)) + .filter((item) => isPlausibleOrganizationName(item)) + .slice(0, 20); + const dedupRowRefs = Array.from(new Set(rowRefs)) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(item)) + .slice(0, 20); + + if (dedupRowNames.length === 0 && dedupRowRefs.length === 0) { + const fallbackBucket = { names: [] as string[], refs: [] as string[] }; + appendOrganizationFactsFromValue( + row, + { + organizationHint: true, + nameHint: true, + guidHint: true + }, + fallbackBucket + ); + for (const value of fallbackBucket.names) { + if (isPlausibleOrganizationName(value)) { + dedupRowNames.push(value); + } + } + for (const value of fallbackBucket.refs) { + const normalized = String(value ?? "").toLowerCase(); + if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(normalized)) { + dedupRowRefs.push(normalized); + } + } + } + + names.push(...dedupRowNames); + refs.push(...dedupRowRefs); + if (dedupRowRefs.length > 0 && dedupRowNames.length > 0) { + for (const ref of dedupRowRefs) { + for (const name of dedupRowNames) { + pairs.push({ ref, name }); + } + } + } + } + + return { + names: Array.from(new Set(names)).slice(0, 20), + refs: Array.from(new Set(refs)).slice(0, 20), + pairs: Array.from(new Set(pairs.map((item) => `${item.ref}||${item.name}`))) + .map((token) => { + const [ref, name] = token.split("||"); + return { ref, name }; + }) + .slice(0, 100) + }; + } + + function resolveOrganizationNamesByRefs( + refs: unknown[], + facts: AssistantOrganizationFacts | null | undefined + ): string[] { + const refSet = new Set( + (Array.isArray(refs) ? refs : []) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => item.length > 0) + ); + if (refSet.size === 0) { + return []; + } + + const names: string[] = []; + for (const pair of Array.isArray(facts?.pairs) ? facts?.pairs : []) { + const ref = String(pair?.ref ?? "").toLowerCase(); + const name = normalizeScopeLabel(pair?.name ?? ""); + if (!ref || !name || !refSet.has(ref)) { + continue; + } + names.push(name); + } + + return Array.from(new Set(names)).slice(0, 20); + } + + async function resolveAssistantDataScopeProbe(): Promise { + const cacheKey = `${deps.mcpProxyUrl}|${deps.activeMcpChannel}`; + const now = Date.now(); + const cached = dataScopeProbeCache.get(cacheKey); + if (cached && Number(cached.expiresAt ?? 0) > now) { + return cached.value; + } + + if (String(process.env.NODE_ENV ?? "").toLowerCase() === "test") { + return { + status: "skipped_test_env", + channel: deps.activeMcpChannel, + organizations: [], + error: null + }; + } + + const catalogQueryCandidates = [ + "ВЫБРАТЬ ПЕРВЫЕ 20 Организации.Наименование КАК Организация ИЗ Справочник.Организации КАК Организации", + "ВЫБРАТЬ ПЕРВЫЕ 20 Организации.НаименованиеПолное КАК Организация ИЗ Справочник.Организации КАК Организации", + "ВЫБРАТЬ ПЕРВЫЕ 100 Организации.Ссылка КАК Организация, ПРЕДСТАВЛЕНИЕ(Организации.Ссылка) КАК ОрганизацияПредставление ИЗ Справочник.Организации КАК Организации" + ]; + const movementProbeCandidates = [ + "ВЫБРАТЬ ПЕРВЫЕ 60 Движения.Организация КАК Организация ИЗ РегистрБухгалтерии.Хозрасчетный КАК Движения УПОРЯДОЧИТЬ ПО Движения.Период УБЫВ", + "ВЫБРАТЬ ПЕРВЫЕ 60 Движения.Организация КАК Организация ИЗ РегистрБухгалтерии.Хозрасчетный КАК Движения" + ]; + + let lastError: string | null = null; + const catalogFacts: AssistantOrganizationFacts = { names: [], refs: [], pairs: [] }; + for (const queryText of catalogQueryCandidates) { + const probe = await deps.executeAddressMcpQuery({ + query: queryText, + limit: 100 + }); + if (probe.error) { + lastError = String(probe.error); + continue; + } + + const facts = extractOrganizationFactsFromRows(Array.isArray(probe.rows) ? probe.rows : []); + catalogFacts.names.push(...facts.names); + catalogFacts.refs.push(...facts.refs); + catalogFacts.pairs.push(...facts.pairs); + if (facts.names.length > 0) { + const resolved = buildResolvedDataScopeProbe("resolved", deps.activeMcpChannel, facts.names); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + + const movementFacts: AssistantOrganizationFacts = { names: [], refs: [], pairs: [] }; + for (const queryText of movementProbeCandidates) { + const probe = await deps.executeAddressMcpQuery({ + query: queryText, + limit: 60 + }); + if (probe.error) { + lastError = String(probe.error); + continue; + } + + const facts = extractOrganizationFactsFromRows(Array.isArray(probe.rows) ? probe.rows : []); + movementFacts.names.push(...facts.names); + movementFacts.refs.push(...facts.refs); + movementFacts.pairs.push(...facts.pairs); + if (facts.names.length > 0) { + const resolved = buildResolvedDataScopeProbe("resolved_from_activity", deps.activeMcpChannel, facts.names); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + + const movementRefs = Array.from(new Set(movementFacts.refs)) + .map((item) => String(item ?? "").toLowerCase()) + .filter((item) => item.length > 0); + if (movementRefs.length > 0) { + const namesFromCatalogPairs = resolveOrganizationNamesByRefs(movementRefs, { + names: Array.from(new Set(catalogFacts.names)), + refs: Array.from(new Set(catalogFacts.refs)), + pairs: catalogFacts.pairs + }); + if (namesFromCatalogPairs.length > 0) { + const resolved = buildResolvedDataScopeProbe( + "resolved_from_ref_lookup", + deps.activeMcpChannel, + namesFromCatalogPairs + ); + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: resolved + }); + return resolved; + } + } + + const fallback: AssistantDataScopeProbe = { + status: lastError ? "unresolved_with_error" : "unresolved", + channel: deps.activeMcpChannel, + organizations: [], + error: lastError + }; + dataScopeProbeCache.set(cacheKey, { + expiresAt: now + DATA_SCOPE_CACHE_TTL_MS, + value: fallback + }); + return fallback; + } + + return { + parseOrganizationsFromDataScopeAssistantText, + extractKnownOrganizationsFromHistory, + findLastAssistantActiveOrganization, + extractOrganizationFactsFromRows, + resolveOrganizationNamesByRefs, + resolveAssistantDataScopeProbe + }; +} diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index d212a55..d1114de 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -19,6 +19,7 @@ import * as capabilitiesRegistry_1 from "./capabilitiesRegistry"; import * as assistantCanon_1 from "./assistantCanon"; import * as assistantAddressAttemptRuntimeAdapter_1 from "./assistantAddressAttemptRuntimeAdapter"; import * as assistantCoverageGrounding_1 from "./assistantCoverageGrounding"; +import * as assistantDataScopePolicy_1 from "./assistantDataScopePolicy"; import * as assistantDeepTurnAttemptRuntimeAdapter_1 from "./assistantDeepTurnAttemptRuntimeAdapter"; import * as assistantBoundaryPolicy_1 from "./assistantBoundaryPolicy"; import * as assistantLivingModePolicy_1 from "./assistantLivingModePolicy"; @@ -35,8 +36,7 @@ import * as assistantTurnRuntimeInputBuilder_1 from "./assistantTurnRuntimeInput import * as assistantUserTurnBootstrapRuntimeAdapter_1 from "./assistantUserTurnBootstrapRuntimeAdapter"; import * as assistantQueryPlanning_1 from "./assistantQueryPlanning"; import iconv from "iconv-lite"; -const DATA_SCOPE_CACHE_TTL_MS = 60_000; -const dataScopeProbeCache = new Map(); +const normalizeOrganizationScopeValue = assistantOrganizationMatcher_1.normalizeOrganizationScopeValue; function retrievalSummaryForRoute(route) { if (route === "store_canonical") return "Canonical accounting data path selected."; @@ -4185,105 +4185,6 @@ function hasLivingChatSignal(text) { function buildAssistantCapabilityContractReply() { return (0, capabilitiesRegistry_1.buildCapabilityContractReplyFromRegistry)(); } -function normalizeScopeLabel(value) { - const repaired = repairAddressMojibake(String(value ?? "")); - let normalized = compactWhitespace(repaired.trim()); - for (let index = 0; index < 2; index += 1) { - const first = normalized[0]; - const last = normalized[normalized.length - 1]; - const wrappedInQuotes = (first === "\"" && last === "\"") || - (first === "'" && last === "'") || - (first === "«" && last === "»"); - if (!wrappedInQuotes) { - break; - } - normalized = compactWhitespace(normalized.slice(1, -1).trim()); - } - if (!normalized) { - return null; - } - if (/^(?:null|undefined|nan|0|не\s*заполнено)$/i.test(normalized)) { - return null; - } - return normalized; -} -function normalizeScopeKey(value) { - return repairAddressMojibake(String(value ?? "")).toLowerCase().replace(/ё/g, "е"); -} -const ORGANIZATION_SCOPE_STOPWORDS = new Set([ - "ооо", - "ao", - "ао", - "зао", - "ип", - "llc", - "ltd", - "company", - "компания", - "организация", - "организации", - "контора", - "конторы", - "фирма", - "фирмы", - "по", - "для", - "над", - "под", - "без", - "с", - "со", - "в", - "во", - "на", - "и", - "или", - "а", - "но", - "не", - "мы", - "нам", - "наш", - "наша", - "наше", - "наши", - "ты", - "тебе", - "твой", - "сейчас", - "щас", - "тут", - "вот", - "давай", - "го", - "погнали", - "тогда", - "обсудим", - "обсуждать", - "работать", - "работаем", - "работаешь", - "работаете", - "можем", - "можно", - "какая", - "какой", - "какие", - "чья", - "чье", - "чьи" -]); -function normalizeOrganizationScopeValue(value) { - const normalized = normalizeScopeLabel(value); - if (!normalized) { - return null; - } - const unwrapped = normalized - .replace(/^"+|"+$/g, "") - .replace(/^'+|'+$/g, "") - .trim(); - return unwrapped ? unwrapped : null; -} const assistantProviderExecutionPolicy = (0, assistantProviderExecutionPolicy_1.createAssistantProviderExecutionPolicy)(); const assistantLivingModePolicy = (0, assistantLivingModePolicy_1.createAssistantLivingModePolicy)({ featureAssistantLivingChatRouterV1: config_1.FEATURE_ASSISTANT_LIVING_CHAT_ROUTER_V1, @@ -4380,6 +4281,78 @@ const assistantTransitionPolicy = (0, assistantTransitionPolicy_1.createAssistan extractDisplayedAddressEntityCandidates, resolveDisplayedAddressEntityMention }); +const assistantDataScopePolicy = (0, assistantDataScopePolicy_1.createAssistantDataScopePolicy)({ + activeMcpChannel: config_1.ASSISTANT_MCP_CHANNEL, + mcpProxyUrl: config_1.ASSISTANT_MCP_PROXY_URL, + executeAddressMcpQuery: addressMcpClient_1.executeAddressMcpQuery, + repairAddressMojibake +}); +function normalizeScopeKey(value) { + return repairAddressMojibake(String(value ?? "")).toLowerCase().replace(/ё/g, "е"); +} +const ORGANIZATION_SCOPE_STOPWORDS = new Set([ + "ооо", + "ao", + "ао", + "зао", + "ип", + "llc", + "ltd", + "company", + "компания", + "организация", + "организации", + "контора", + "конторы", + "фирма", + "фирмы", + "по", + "для", + "над", + "под", + "без", + "с", + "со", + "в", + "во", + "на", + "и", + "или", + "а", + "но", + "не", + "мы", + "нам", + "наш", + "наша", + "наше", + "наши", + "ты", + "тебе", + "твой", + "сейчас", + "щас", + "тут", + "вот", + "давай", + "го", + "погнали", + "тогда", + "обсудим", + "обсуждать", + "работать", + "работаем", + "работаешь", + "работаете", + "можем", + "можно", + "какая", + "какой", + "какие", + "чья", + "чье", + "чьи" +]); function normalizeOrganizationScopeSearchText(value) { const source = normalizeScopeKey(value); return source @@ -4638,9 +4611,9 @@ function resolveSessionOrganizationScopeContext(userMessage, items, addressNavig userMessage, items, addressNavigationState, - extractKnownOrganizationsFromHistory, + extractKnownOrganizationsFromHistory: assistantDataScopePolicy.extractKnownOrganizationsFromHistory, resolveOrganizationSelectionFromMessage, - findLastAssistantActiveOrganization, + findLastAssistantActiveOrganization: assistantDataScopePolicy.findLastAssistantActiveOrganization, normalizeOrganizationScopeValue }); } @@ -4894,12 +4867,13 @@ function buildResolvedDataScopeProbe(status, organizations) { }; } export function extractOrganizationFactsFromRowsForTests(rows) { - return extractOrganizationFactsFromRows(rows); + return assistantDataScopePolicy.extractOrganizationFactsFromRows(rows); } export function resolveOrganizationNamesByRefsForTests(refs, facts) { - return resolveOrganizationNamesByRefs(refs, facts); + return assistantDataScopePolicy.resolveOrganizationNamesByRefs(refs, facts); } async function resolveAssistantDataScopeProbe() { + return assistantDataScopePolicy.resolveAssistantDataScopeProbe(); const cacheKey = `${config_1.ASSISTANT_MCP_PROXY_URL}|${config_1.ASSISTANT_MCP_CHANNEL}`; const now = Date.now(); const cached = dataScopeProbeCache.get(cacheKey);