From 5f4e898c7c8b258e2486b51ece8ccf34e14efa19 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sat, 11 Apr 2026 00:22:00 +0300 Subject: [PATCH] =?UTF-8?q?=D0=93=D0=9B=D0=9E=D0=91=D0=90=D0=9B=D0=AC?= =?UTF-8?q?=D0=9D=D0=AB=D0=99=20=D0=A0=D0=95=D0=A4=D0=90=D0=9A=D0=A2=D0=9E?= =?UTF-8?q?=D0=A0=D0=98=D0=9D=D0=93=20=D0=90=D0=A0=D0=A5=D0=98=D0=A2=D0=95?= =?UTF-8?q?=D0=9A=D0=A2=D0=A3=D0=A0=D0=AB=20-=20=D0=A0=D0=B5=D1=84=D0=B0?= =?UTF-8?q?=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D1=8D=D1=82=D0=B0?= =?UTF-8?q?=D0=BF=D0=BE=D0=B2=202.46:=20=20=20=D1=81=D0=BA=D0=BB=D0=B5?= =?UTF-8?q?=D0=B9=D0=BA=D0=B0=20=D0=B2=D1=81=D0=B5=D0=B3=D0=BE=20address-?= =?UTF-8?q?=D0=BF=D0=BE=D1=82=D0=BE=D0=BA=D0=B0=20=D0=B2=20=D0=BE=D1=82?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B9=20assistantAddressAt?= =?UTF-8?q?temptRuntimeAdapter,=20=D0=B4=D0=BB=D1=8F=20=D1=82=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=87=D1=82=D0=BE=D0=B1=D1=8B=20=D0=B8=D0=B7=20handle?= =?UTF-8?q?Message=20=D1=83=D0=B1=D1=80=D0=B0=D1=82=D1=8C=20=D1=82=D1=80?= =?UTF-8?q?=D0=B8=20=D0=BB=D0=BE=D0=BA=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B=D1=85?= =?UTF-8?q?=20closure=20=D1=86=D0=B5=D0=BB=D0=B8=D0=BA=D0=BE=D0=BC.=20=20?= =?UTF-8?q?=D0=92=D1=81=D0=B5=20=D1=82=D1=80=D0=B8=20=D0=BC=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D0=B0=20(lane,=20response,=20living=20chat)=20=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=BA=D0=B8=D0=B4=D1=8B=D0=B2=D0=B0=D1=8E=D1=82?= =?UTF-8?q?=D1=81=D1=8F=20=D0=B8=20=D0=BD=D0=B5=20=D0=BB=D0=BE=D0=BC=D0=B0?= =?UTF-8?q?=D1=8E=D1=82=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=B0=D0=BA=D1=82?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/TECH/1CLLMARCH-FACT.md | 33 ++- .../assistantAddressAttemptRuntimeAdapter.js | 117 +++++++++ .../backend/dist/services/assistantService.js | 96 ++------ .../assistantAddressAttemptRuntimeAdapter.ts | 228 ++++++++++++++++++ .../backend/src/services/assistantService.ts | 96 ++------ ...istantAddressAttemptRuntimeAdapter.test.ts | 196 +++++++++++++++ .../data/autorun_annotations/annotations.json | 108 +++++++++ .../data/autorun_generators/history.json | 60 +++++ ...d_20260410210241_gen-mnte6y9p-4v1kfbw.json | 174 +++++++++++++ ...d_20260410210344_gen-mnte8abx-ax3v3tr.json | 174 +++++++++++++ ...istant_autogen_runtime_job-bOkyd627Q3.json | 130 ++++++++++ 11 files changed, 1269 insertions(+), 143 deletions(-) create mode 100644 llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js create mode 100644 llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts create mode 100644 llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts create mode 100644 llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210241_gen-mnte6y9p-4v1kfbw.json create mode 100644 llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210344_gen-mnte8abx-ax3v3tr.json create mode 100644 llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-bOkyd627Q3.json diff --git a/docs/TECH/1CLLMARCH-FACT.md b/docs/TECH/1CLLMARCH-FACT.md index 0292dc3..a6f9591 100644 --- a/docs/TECH/1CLLMARCH-FACT.md +++ b/docs/TECH/1CLLMARCH-FACT.md @@ -1437,7 +1437,38 @@ Validation: - `assistantDeepTurnPackagingRuntimeAdapter.test.ts` - `assistantWave10SettlementCorrectiveRegression.test.ts` -Status: **In progress (Phase 2.1 + 2.2 + 2.3 + 2.4 + 2.5 + 2.6 + 2.7 + 2.8 + 2.9 + 2.10 + 2.11 + 2.12 + 2.13 + 2.14 + 2.15 + 2.16 + 2.17 + 2.18 + 2.19 + 2.20 + 2.21 + 2.22 + 2.23 + 2.24 + 2.25 + 2.26 + 2.27 + 2.28 + 2.29 + 2.30 + 2.31 + 2.32 + 2.33 + 2.34 + 2.35 + 2.36 + 2.37 + 2.38 + 2.39 + 2.40 + 2.41 + 2.42 + 2.43 + 2.44 + 2.45 completed)** +Implemented in current pass (Phase 2.46): +1. Activated full address-turn attempt runtime boundary inside `assistantService`: + - rewired address handling call-site to `runAssistantAddressAttemptRuntime(...)`; + - removed local inline closures for lane-attempt, lane-response, and living-chat handoff wiring from `handleMessage`. +2. Finalized and type-hardened address attempt adapter contract: + - `assistantAddressAttemptRuntimeAdapter.ts` + - aligned `logEvent` and `messageIdFactory` contract typing to runtime expectations. +3. Added focused unit tests: + - `assistantAddressAttemptRuntimeAdapter.test.ts` + +Validation: +1. `npm run build` passed. +2. Targeted living/address/deep followup pack passed: + - `assistantAddressAttemptRuntimeAdapter.test.ts` + - `assistantDeepTurnAttemptRuntimeAdapter.test.ts` + - `assistantDeepTurnResponseAttemptRuntimeAdapter.test.ts` + - `assistantDeepTurnAnalysisAttemptRuntimeAdapter.test.ts` + - `assistantDeepTurnAnalysisRuntimeAdapter.test.ts` + - `assistantAddressLaneResponseAttemptRuntimeAdapter.test.ts` + - `assistantLivingChatAttemptRuntimeAdapter.test.ts` + - `assistantAddressLaneAttemptRuntimeAdapter.test.ts` + - `assistantUserTurnBootstrapRuntimeAdapter.test.ts` + - `assistantLivingChatLlmRuntimeAdapter.test.ts` + - `assistantLivingChatHandlerRuntimeAdapter.test.ts` + - `assistantLivingChatRuntimeAdapter.test.ts` + - `assistantAddressRuntimeAdapter.test.ts` + - `assistantAddressLaneResponseRuntimeAdapter.test.ts` + - `assistantDeepTurnResponseRuntimeAdapter.test.ts` + - `assistantDeepTurnPackagingRuntimeAdapter.test.ts` + - `assistantWave10SettlementCorrectiveRegression.test.ts` + +Status: **In progress (Phase 2.1 + 2.2 + 2.3 + 2.4 + 2.5 + 2.6 + 2.7 + 2.8 + 2.9 + 2.10 + 2.11 + 2.12 + 2.13 + 2.14 + 2.15 + 2.16 + 2.17 + 2.18 + 2.19 + 2.20 + 2.21 + 2.22 + 2.23 + 2.24 + 2.25 + 2.26 + 2.27 + 2.28 + 2.29 + 2.30 + 2.31 + 2.32 + 2.33 + 2.34 + 2.35 + 2.36 + 2.37 + 2.38 + 2.39 + 2.40 + 2.41 + 2.42 + 2.43 + 2.44 + 2.45 + 2.46 completed)** ## Stage 3 (P2): Hybrid Semantic Layer (LLM + Deterministic Guards) diff --git a/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js new file mode 100644 index 0000000..faf038f --- /dev/null +++ b/llm_normalizer/backend/dist/services/assistantAddressAttemptRuntimeAdapter.js @@ -0,0 +1,117 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.runAssistantAddressAttemptRuntime = runAssistantAddressAttemptRuntime; +const assistantAddressRuntimeAdapter_1 = require("./assistantAddressRuntimeAdapter"); +const assistantAddressLaneAttemptRuntimeAdapter_1 = require("./assistantAddressLaneAttemptRuntimeAdapter"); +const assistantAddressLaneResponseAttemptRuntimeAdapter_1 = require("./assistantAddressLaneResponseAttemptRuntimeAdapter"); +const assistantLivingChatAttemptRuntimeAdapter_1 = require("./assistantLivingChatAttemptRuntimeAdapter"); +async function runAssistantAddressAttemptRuntime(input) { + const runAddressRuntimeSafe = input.runAddressRuntime ?? assistantAddressRuntimeAdapter_1.runAssistantAddressRuntime; + const runAddressLaneAttemptRuntimeSafe = input.runAddressLaneAttemptRuntime ?? assistantAddressLaneAttemptRuntimeAdapter_1.runAssistantAddressLaneAttemptRuntime; + const runAddressLaneResponseAttemptRuntimeSafe = input.runAddressLaneResponseAttemptRuntime ?? assistantAddressLaneResponseAttemptRuntimeAdapter_1.runAssistantAddressLaneResponseAttemptRuntime; + const runLivingChatAttemptRuntimeSafe = input.runLivingChatAttemptRuntime ?? assistantLivingChatAttemptRuntimeAdapter_1.runAssistantLivingChatAttemptRuntime; + const finalizeAddressLaneResponse = (addressLane, effectiveAddressUserMessage, carryoverMeta = null, llmPreDecomposeMeta = null) => runAddressLaneResponseAttemptRuntimeSafe({ + sessionId: input.sessionId, + userMessage: input.userMessage, + effectiveAddressUserMessage, + addressLane, + carryoverMeta, + llmPreDecomposeMeta, + knownOrganizations: input.sessionScope.knownOrganizations, + activeOrganization: input.sessionScope.activeOrganization, + sanitizeOutgoingAssistantText: input.sanitizeOutgoingAssistantText, + buildAddressDebugPayload: input.buildAddressDebugPayload, + buildAddressFollowupOffer: input.buildAddressFollowupOffer, + mergeKnownOrganizations: input.mergeKnownOrganizations, + toNonEmptyString: input.toNonEmptyString, + appendItem: input.appendItem, + getSession: input.getSession, + persistSession: input.persistSession, + cloneConversation: input.cloneConversation, + logEvent: input.logEvent, + messageIdFactory: input.messageIdFactory + }); + const tryHandleLivingChat = async (modeDecision, addressRuntimeMeta = null) => runLivingChatAttemptRuntimeSafe({ + sessionId: input.sessionId, + userMessage: input.userMessage, + sessionItems: input.sessionItems, + modeDecision, + sessionScope: { + knownOrganizations: input.sessionScope.knownOrganizations, + selectedOrganization: input.sessionScope.selectedOrganization, + activeOrganization: input.sessionScope.activeOrganization + }, + addressRuntimeMeta, + traceIdFactory: () => `chat-${input.messageIdFactory().replace(/^msg-/, "")}`, + toNonEmptyString: input.toNonEmptyString, + mergeKnownOrganizations: input.mergeKnownOrganizations, + hasAssistantDataScopeMetaQuestionSignal: input.hasAssistantDataScopeMetaQuestionSignal, + shouldHandleAsAssistantCapabilityMetaQuery: input.shouldHandleAsAssistantCapabilityMetaQuery, + hasDestructiveDataActionSignal: input.hasDestructiveDataActionSignal, + hasDangerOrCoercionSignal: input.hasDangerOrCoercionSignal, + hasOperationalAdminActionRequestSignal: input.hasOperationalAdminActionRequestSignal, + hasOrganizationFactLookupSignal: input.hasOrganizationFactLookupSignal, + hasOrganizationFactFollowupSignal: input.hasOrganizationFactFollowupSignal, + shouldEmitOrganizationSelectionReply: input.shouldEmitOrganizationSelectionReply, + hasAssistantCapabilityQuestionSignal: input.hasAssistantCapabilityQuestionSignal, + resolveDataScopeProbe: input.resolveDataScopeProbe, + applyScriptGuard: input.applyScriptGuard, + applyGroundingGuard: input.applyGroundingGuard, + buildAssistantSafetyRefusalReply: input.buildAssistantSafetyRefusalReply, + buildAssistantDataScopeContractReply: input.buildAssistantDataScopeContractReply, + buildAssistantOrganizationFactBoundaryReply: input.buildAssistantOrganizationFactBoundaryReply, + buildAssistantDataScopeSelectionReply: input.buildAssistantDataScopeSelectionReply, + buildAssistantOperationalBoundaryReply: input.buildAssistantOperationalBoundaryReply, + buildAssistantCapabilityContractReply: input.buildAssistantCapabilityContractReply, + appendItem: input.appendItem, + getSession: input.getSession, + persistSession: input.persistSession, + cloneConversation: input.cloneConversation, + logEvent: input.logEvent, + messageIdFactory: input.messageIdFactory, + nowIso: input.nowIso, + payload: input.payload, + chatClient: input.chatClient, + loadAssistantCanonExcerpt: input.loadAssistantCanonExcerpt, + sanitizeOutgoingAssistantText: input.sanitizeOutgoingAssistantText, + defaultModel: input.defaultModel, + defaultBaseUrl: input.defaultBaseUrl, + defaultApiKey: input.defaultApiKey + }); + const runAddressLaneAttempt = async (messageUsed, carryMeta, analysisDateHint) => runAddressLaneAttemptRuntimeSafe({ + messageUsed, + carryMeta, + analysisDateHint, + activeOrganization: input.sessionScope.activeOrganization, + mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, + runAddressQueryTryHandle: input.runAddressQueryTryHandle + }); + return runAddressRuntimeSafe({ + featureAssistantAddressQueryV1: input.featureAssistantAddressQueryV1, + sessionId: input.sessionId, + userMessage: input.userMessage, + sessionItems: input.sessionItems, + llmProvider: input.payload.llmProvider, + useMock: Boolean(input.payload.useMock), + featureAddressLlmPredecomposeV1: input.featureAddressLlmPredecomposeV1, + runAddressLlmPreDecompose: input.runAddressLlmPreDecompose, + buildAddressLlmPredecomposeContractV1: input.buildAddressLlmPredecomposeContractV1, + sanitizeAddressMessageForFallback: input.sanitizeAddressMessageForFallback, + toNonEmptyString: input.toNonEmptyString, + resolveAddressFollowupCarryoverContext: input.resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision: input.resolveAssistantOrchestrationDecision, + buildAddressDialogContinuationContractV2: input.buildAddressDialogContinuationContractV2, + runtimeAnalysisContextAsOfDate: input.runtimeAnalysisContextAsOfDate, + payloadContextPeriodHint: input.payload?.context?.period_hint, + compactWhitespace: input.compactWhitespace, + runAddressLaneAttempt, + isRetryableAddressLimitedResult: input.isRetryableAddressLimitedResult, + finalizeAddressLaneResponse, + tryHandleLivingChat, + logEvent: input.logEvent, + nowIso: input.nowIso, + runAddressOrchestrationRuntime: input.runAddressOrchestrationRuntime, + runAddressToolGateRuntime: input.runAddressToolGateRuntime, + runAddressLaneRuntime: input.runAddressLaneRuntime + }); +} diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 332c166..9cb2e83 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -62,12 +62,9 @@ const openaiResponsesClient_1 = __importStar(require("./openaiResponsesClient")) const addressMcpClient_1 = __importStar(require("./addressMcpClient")); const capabilitiesRegistry_1 = __importStar(require("./capabilitiesRegistry")); const assistantCanon_1 = __importStar(require("./assistantCanon")); -const assistantAddressLaneResponseAttemptRuntimeAdapter_1 = __importStar(require("./assistantAddressLaneResponseAttemptRuntimeAdapter")); +const assistantAddressAttemptRuntimeAdapter_1 = __importStar(require("./assistantAddressAttemptRuntimeAdapter")); const assistantCoverageGrounding_1 = __importStar(require("./assistantCoverageGrounding")); const assistantDeepTurnAttemptRuntimeAdapter_1 = __importStar(require("./assistantDeepTurnAttemptRuntimeAdapter")); -const assistantAddressRuntimeAdapter_1 = __importStar(require("./assistantAddressRuntimeAdapter")); -const assistantAddressLaneAttemptRuntimeAdapter_1 = __importStar(require("./assistantAddressLaneAttemptRuntimeAdapter")); -const assistantLivingChatAttemptRuntimeAdapter_1 = __importStar(require("./assistantLivingChatAttemptRuntimeAdapter")); const assistantUserTurnBootstrapRuntimeAdapter_1 = __importStar(require("./assistantUserTurnBootstrapRuntimeAdapter")); const assistantQueryPlanning_1 = __importStar(require("./assistantQueryPlanning")); const iconv_lite_1 = __importDefault(require("iconv-lite")); @@ -4385,40 +4382,30 @@ class AssistantService { nowIso: () => new Date().toISOString() }); const sessionOrganizationScope = resolveSessionOrganizationScopeContext(userMessage, session.items); - const finalizeAddressLaneResponse = (addressLane, effectiveAddressUserMessage, carryoverMeta = null, llmPreDecomposeMeta = null) => (0, assistantAddressLaneResponseAttemptRuntimeAdapter_1.runAssistantAddressLaneResponseAttemptRuntime)({ - sessionId, - userMessage, - effectiveAddressUserMessage, - addressLane, - carryoverMeta, - llmPreDecomposeMeta, - knownOrganizations: sessionOrganizationScope.knownOrganizations, - activeOrganization: sessionOrganizationScope.activeOrganization, - sanitizeOutgoingAssistantText, - buildAddressDebugPayload, - buildAddressFollowupOffer, - mergeKnownOrganizations, - toNonEmptyString, - appendItem: (targetSessionId, item) => this.sessions.appendItem(targetSessionId, item), - getSession: (targetSessionId) => this.sessions.getSession(targetSessionId), - persistSession: (sessionState) => this.sessionLogger.persistSession(sessionState), - cloneConversation: (items) => cloneItems(items), - logEvent: (payload) => (0, log_1.logJson)(payload), - messageIdFactory: () => `msg-${(0, nanoid_1.nanoid)(10)}` - }); - const tryHandleLivingChat = async (modeDecision, addressRuntimeMeta = null) => (0, assistantLivingChatAttemptRuntimeAdapter_1.runAssistantLivingChatAttemptRuntime)({ + const addressRuntime = await (0, assistantAddressAttemptRuntimeAdapter_1.runAssistantAddressAttemptRuntime)({ + featureAssistantAddressQueryV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_V1, sessionId, userMessage, sessionItems: session.items, - modeDecision, + payload, sessionScope: { knownOrganizations: sessionOrganizationScope.knownOrganizations, selectedOrganization: sessionOrganizationScope.selectedOrganization, activeOrganization: sessionOrganizationScope.activeOrganization }, - addressRuntimeMeta, - traceIdFactory: () => `chat-${(0, nanoid_1.nanoid)(10)}`, + featureAddressLlmPredecomposeV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1, + runAddressLlmPreDecompose: async () => runAddressLlmPreDecompose(this.normalizerService, payload, userMessage), + buildAddressLlmPredecomposeContractV1: predecomposeContract_1.buildAddressLlmPredecomposeContractV1, + sanitizeAddressMessageForFallback, toNonEmptyString, + resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision, + buildAddressDialogContinuationContractV2, + runtimeAnalysisContextAsOfDate: runtimeAnalysisContext.as_of_date, + compactWhitespace, + mergeFollowupContextWithOrganizationScope, + runAddressQueryTryHandle: (laneMessageUsed, options) => this.addressQueryService.tryHandle(laneMessageUsed, options), + isRetryableAddressLimitedResult, mergeKnownOrganizations, hasAssistantDataScopeMetaQuestionSignal, shouldHandleAsAssistantCapabilityMetaQuery, @@ -4438,56 +4425,23 @@ class AssistantService { buildAssistantDataScopeSelectionReply, buildAssistantOperationalBoundaryReply, buildAssistantCapabilityContractReply, + chatClient: this.chatClient, + loadAssistantCanonExcerpt: assistantCanon_1.loadAssistantCanonExcerpt, + sanitizeOutgoingAssistantText, + defaultModel: config_1.DEFAULT_MODEL, + defaultBaseUrl: config_1.DEFAULT_OPENAI_BASE_URL, + defaultApiKey: process.env.OPENAI_API_KEY ?? "", + buildAddressDebugPayload, + buildAddressFollowupOffer, appendItem: (targetSessionId, item) => this.sessions.appendItem(targetSessionId, item), getSession: (targetSessionId) => this.sessions.getSession(targetSessionId), persistSession: (sessionState) => this.sessionLogger.persistSession(sessionState), cloneConversation: (items) => cloneItems(items), logEvent: (payload) => (0, log_1.logJson)(payload), messageIdFactory: () => `msg-${(0, nanoid_1.nanoid)(10)}`, - nowIso: () => new Date().toISOString(), - payload, - chatClient: this.chatClient, - loadAssistantCanonExcerpt: assistantCanon_1.loadAssistantCanonExcerpt, - sanitizeOutgoingAssistantText, - defaultModel: config_1.DEFAULT_MODEL, - defaultBaseUrl: config_1.DEFAULT_OPENAI_BASE_URL, - defaultApiKey: process.env.OPENAI_API_KEY ?? "" - }); - let addressRuntimeMetaForDeep = null; - const runAddressLaneAttempt = async (messageUsed, carryMeta, analysisDateHint) => (0, assistantAddressLaneAttemptRuntimeAdapter_1.runAssistantAddressLaneAttemptRuntime)({ - messageUsed, - carryMeta: carryMeta ?? null, - analysisDateHint: analysisDateHint ?? null, - activeOrganization: sessionOrganizationScope.activeOrganization, - mergeFollowupContextWithOrganizationScope, - runAddressQueryTryHandle: (laneMessageUsed, options) => this.addressQueryService.tryHandle(laneMessageUsed, options) - }); - const addressRuntime = await (0, assistantAddressRuntimeAdapter_1.runAssistantAddressRuntime)({ - featureAssistantAddressQueryV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_V1, - sessionId, - userMessage, - sessionItems: session.items, - llmProvider: payload?.llmProvider, - useMock: Boolean(payload.useMock), - featureAddressLlmPredecomposeV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1, - runAddressLlmPreDecompose: async () => runAddressLlmPreDecompose(this.normalizerService, payload, userMessage), - buildAddressLlmPredecomposeContractV1: predecomposeContract_1.buildAddressLlmPredecomposeContractV1, - sanitizeAddressMessageForFallback, - toNonEmptyString, - resolveAddressFollowupCarryoverContext, - resolveAssistantOrchestrationDecision, - buildAddressDialogContinuationContractV2, - runtimeAnalysisContextAsOfDate: runtimeAnalysisContext.as_of_date, - payloadContextPeriodHint: payload?.context?.period_hint, - compactWhitespace, - runAddressLaneAttempt, - isRetryableAddressLimitedResult, - finalizeAddressLaneResponse, - tryHandleLivingChat: (modeDecision, runtimeMeta) => tryHandleLivingChat(modeDecision, runtimeMeta), - logEvent: (payload) => (0, log_1.logJson)(payload), nowIso: () => new Date().toISOString() }); - addressRuntimeMetaForDeep = addressRuntime.addressRuntimeMetaForDeep; + const addressRuntimeMetaForDeep = addressRuntime.addressRuntimeMetaForDeep; if (addressRuntime.handled && addressRuntime.response) { return addressRuntime.response; } diff --git a/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts new file mode 100644 index 0000000..7d511d8 --- /dev/null +++ b/llm_normalizer/backend/src/services/assistantAddressAttemptRuntimeAdapter.ts @@ -0,0 +1,228 @@ +import { + runAssistantAddressRuntime, + type RunAssistantAddressRuntimeInput, + type RunAssistantAddressRuntimeOutput +} from "./assistantAddressRuntimeAdapter"; +import { + runAssistantAddressLaneAttemptRuntime, + type RunAssistantAddressLaneAttemptRuntimeInput +} from "./assistantAddressLaneAttemptRuntimeAdapter"; +import { + runAssistantAddressLaneResponseAttemptRuntime, + type RunAssistantAddressLaneResponseAttemptRuntimeInput +} from "./assistantAddressLaneResponseAttemptRuntimeAdapter"; +import { + runAssistantLivingChatAttemptRuntime, + type RunAssistantLivingChatAttemptRuntimeInput +} from "./assistantLivingChatAttemptRuntimeAdapter"; + +interface AddressAttemptPayload { + llmProvider?: unknown; + useMock?: unknown; + context?: { + period_hint?: unknown; + } | null; + apiKey?: unknown; + model?: unknown; + baseUrl?: unknown; + temperature?: number; + maxOutputTokens?: number; +} + +interface AddressSessionScope { + knownOrganizations: string[]; + selectedOrganization: string | null; + activeOrganization: string | null; +} + +export interface RunAssistantAddressAttemptRuntimeInput + extends Omit< + RunAssistantAddressRuntimeInput, + "llmProvider" | "useMock" | "payloadContextPeriodHint" | "runAddressLaneAttempt" | "finalizeAddressLaneResponse" | "tryHandleLivingChat" + > { + payload: AddressAttemptPayload; + sessionScope: AddressSessionScope; + mergeFollowupContextWithOrganizationScope: RunAssistantAddressLaneAttemptRuntimeInput["mergeFollowupContextWithOrganizationScope"]; + runAddressQueryTryHandle: RunAssistantAddressLaneAttemptRuntimeInput["runAddressQueryTryHandle"]; + mergeKnownOrganizations: RunAssistantAddressLaneResponseAttemptRuntimeInput["mergeKnownOrganizations"]; + hasAssistantDataScopeMetaQuestionSignal: RunAssistantLivingChatAttemptRuntimeInput["hasAssistantDataScopeMetaQuestionSignal"]; + shouldHandleAsAssistantCapabilityMetaQuery: RunAssistantLivingChatAttemptRuntimeInput["shouldHandleAsAssistantCapabilityMetaQuery"]; + hasDestructiveDataActionSignal: RunAssistantLivingChatAttemptRuntimeInput["hasDestructiveDataActionSignal"]; + hasDangerOrCoercionSignal: RunAssistantLivingChatAttemptRuntimeInput["hasDangerOrCoercionSignal"]; + hasOperationalAdminActionRequestSignal: RunAssistantLivingChatAttemptRuntimeInput["hasOperationalAdminActionRequestSignal"]; + hasOrganizationFactLookupSignal: RunAssistantLivingChatAttemptRuntimeInput["hasOrganizationFactLookupSignal"]; + hasOrganizationFactFollowupSignal: RunAssistantLivingChatAttemptRuntimeInput["hasOrganizationFactFollowupSignal"]; + shouldEmitOrganizationSelectionReply: RunAssistantLivingChatAttemptRuntimeInput["shouldEmitOrganizationSelectionReply"]; + hasAssistantCapabilityQuestionSignal: RunAssistantLivingChatAttemptRuntimeInput["hasAssistantCapabilityQuestionSignal"]; + resolveDataScopeProbe: RunAssistantLivingChatAttemptRuntimeInput["resolveDataScopeProbe"]; + applyScriptGuard: RunAssistantLivingChatAttemptRuntimeInput["applyScriptGuard"]; + applyGroundingGuard: RunAssistantLivingChatAttemptRuntimeInput["applyGroundingGuard"]; + buildAssistantSafetyRefusalReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantSafetyRefusalReply"]; + buildAssistantDataScopeContractReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantDataScopeContractReply"]; + buildAssistantOrganizationFactBoundaryReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantOrganizationFactBoundaryReply"]; + buildAssistantDataScopeSelectionReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantDataScopeSelectionReply"]; + buildAssistantOperationalBoundaryReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantOperationalBoundaryReply"]; + buildAssistantCapabilityContractReply: RunAssistantLivingChatAttemptRuntimeInput["buildAssistantCapabilityContractReply"]; + chatClient: RunAssistantLivingChatAttemptRuntimeInput["chatClient"]; + loadAssistantCanonExcerpt: RunAssistantLivingChatAttemptRuntimeInput["loadAssistantCanonExcerpt"]; + sanitizeOutgoingAssistantText: RunAssistantLivingChatAttemptRuntimeInput["sanitizeOutgoingAssistantText"]; + defaultModel: RunAssistantLivingChatAttemptRuntimeInput["defaultModel"]; + defaultBaseUrl: RunAssistantLivingChatAttemptRuntimeInput["defaultBaseUrl"]; + defaultApiKey?: RunAssistantLivingChatAttemptRuntimeInput["defaultApiKey"]; + buildAddressDebugPayload: RunAssistantAddressLaneResponseAttemptRuntimeInput["buildAddressDebugPayload"]; + buildAddressFollowupOffer: RunAssistantAddressLaneResponseAttemptRuntimeInput["buildAddressFollowupOffer"]; + appendItem: RunAssistantAddressLaneResponseAttemptRuntimeInput["appendItem"]; + getSession: RunAssistantAddressLaneResponseAttemptRuntimeInput["getSession"]; + persistSession: RunAssistantAddressLaneResponseAttemptRuntimeInput["persistSession"]; + cloneConversation: RunAssistantAddressLaneResponseAttemptRuntimeInput["cloneConversation"]; + logEvent: RunAssistantAddressRuntimeInput["logEvent"]; + messageIdFactory: NonNullable< + RunAssistantAddressLaneResponseAttemptRuntimeInput["messageIdFactory"] + >; + runAddressRuntime?: ( + input: RunAssistantAddressRuntimeInput + ) => Promise>; + runAddressLaneAttemptRuntime?: ( + input: RunAssistantAddressLaneAttemptRuntimeInput + ) => Promise>>; + runAddressLaneResponseAttemptRuntime?: ( + input: RunAssistantAddressLaneResponseAttemptRuntimeInput + ) => ResponseType; + runLivingChatAttemptRuntime?: ( + input: RunAssistantLivingChatAttemptRuntimeInput + ) => Promise; +} + +export async function runAssistantAddressAttemptRuntime( + input: RunAssistantAddressAttemptRuntimeInput +): Promise> { + const runAddressRuntimeSafe = input.runAddressRuntime ?? runAssistantAddressRuntime; + const runAddressLaneAttemptRuntimeSafe = input.runAddressLaneAttemptRuntime ?? runAssistantAddressLaneAttemptRuntime; + const runAddressLaneResponseAttemptRuntimeSafe = + input.runAddressLaneResponseAttemptRuntime ?? runAssistantAddressLaneResponseAttemptRuntime; + const runLivingChatAttemptRuntimeSafe = + input.runLivingChatAttemptRuntime ?? runAssistantLivingChatAttemptRuntime; + + const finalizeAddressLaneResponse: RunAssistantAddressRuntimeInput["finalizeAddressLaneResponse"] = ( + addressLane, + effectiveAddressUserMessage, + carryoverMeta = null, + llmPreDecomposeMeta = null + ) => + runAddressLaneResponseAttemptRuntimeSafe({ + sessionId: input.sessionId, + userMessage: input.userMessage, + effectiveAddressUserMessage, + addressLane, + carryoverMeta, + llmPreDecomposeMeta, + knownOrganizations: input.sessionScope.knownOrganizations, + activeOrganization: input.sessionScope.activeOrganization, + sanitizeOutgoingAssistantText: input.sanitizeOutgoingAssistantText, + buildAddressDebugPayload: input.buildAddressDebugPayload, + buildAddressFollowupOffer: input.buildAddressFollowupOffer, + mergeKnownOrganizations: input.mergeKnownOrganizations as any, + toNonEmptyString: input.toNonEmptyString, + appendItem: input.appendItem, + getSession: input.getSession, + persistSession: input.persistSession, + cloneConversation: input.cloneConversation, + logEvent: input.logEvent, + messageIdFactory: input.messageIdFactory + } as RunAssistantAddressLaneResponseAttemptRuntimeInput); + + const tryHandleLivingChat: RunAssistantAddressRuntimeInput["tryHandleLivingChat"] = async ( + modeDecision, + addressRuntimeMeta = null + ) => + runLivingChatAttemptRuntimeSafe({ + sessionId: input.sessionId, + userMessage: input.userMessage, + sessionItems: input.sessionItems, + modeDecision, + sessionScope: { + knownOrganizations: input.sessionScope.knownOrganizations, + selectedOrganization: input.sessionScope.selectedOrganization, + activeOrganization: input.sessionScope.activeOrganization + }, + addressRuntimeMeta, + traceIdFactory: () => `chat-${input.messageIdFactory().replace(/^msg-/, "")}`, + toNonEmptyString: input.toNonEmptyString, + mergeKnownOrganizations: input.mergeKnownOrganizations as any, + hasAssistantDataScopeMetaQuestionSignal: input.hasAssistantDataScopeMetaQuestionSignal, + shouldHandleAsAssistantCapabilityMetaQuery: input.shouldHandleAsAssistantCapabilityMetaQuery, + hasDestructiveDataActionSignal: input.hasDestructiveDataActionSignal, + hasDangerOrCoercionSignal: input.hasDangerOrCoercionSignal, + hasOperationalAdminActionRequestSignal: input.hasOperationalAdminActionRequestSignal, + hasOrganizationFactLookupSignal: input.hasOrganizationFactLookupSignal, + hasOrganizationFactFollowupSignal: input.hasOrganizationFactFollowupSignal, + shouldEmitOrganizationSelectionReply: input.shouldEmitOrganizationSelectionReply, + hasAssistantCapabilityQuestionSignal: input.hasAssistantCapabilityQuestionSignal, + resolveDataScopeProbe: input.resolveDataScopeProbe, + applyScriptGuard: input.applyScriptGuard, + applyGroundingGuard: input.applyGroundingGuard, + buildAssistantSafetyRefusalReply: input.buildAssistantSafetyRefusalReply, + buildAssistantDataScopeContractReply: input.buildAssistantDataScopeContractReply, + buildAssistantOrganizationFactBoundaryReply: input.buildAssistantOrganizationFactBoundaryReply, + buildAssistantDataScopeSelectionReply: input.buildAssistantDataScopeSelectionReply, + buildAssistantOperationalBoundaryReply: input.buildAssistantOperationalBoundaryReply, + buildAssistantCapabilityContractReply: input.buildAssistantCapabilityContractReply, + appendItem: input.appendItem, + getSession: input.getSession, + persistSession: input.persistSession, + cloneConversation: input.cloneConversation, + logEvent: input.logEvent, + messageIdFactory: input.messageIdFactory, + nowIso: input.nowIso, + payload: input.payload, + chatClient: input.chatClient, + loadAssistantCanonExcerpt: input.loadAssistantCanonExcerpt, + sanitizeOutgoingAssistantText: input.sanitizeOutgoingAssistantText, + defaultModel: input.defaultModel, + defaultBaseUrl: input.defaultBaseUrl, + defaultApiKey: input.defaultApiKey + } as RunAssistantLivingChatAttemptRuntimeInput); + + const runAddressLaneAttempt: RunAssistantAddressRuntimeInput["runAddressLaneAttempt"] = async ( + messageUsed, + carryMeta, + analysisDateHint + ) => + runAddressLaneAttemptRuntimeSafe({ + messageUsed, + carryMeta, + analysisDateHint, + activeOrganization: input.sessionScope.activeOrganization, + mergeFollowupContextWithOrganizationScope: input.mergeFollowupContextWithOrganizationScope, + runAddressQueryTryHandle: input.runAddressQueryTryHandle + }); + + return runAddressRuntimeSafe({ + featureAssistantAddressQueryV1: input.featureAssistantAddressQueryV1, + sessionId: input.sessionId, + userMessage: input.userMessage, + sessionItems: input.sessionItems, + llmProvider: input.payload.llmProvider, + useMock: Boolean(input.payload.useMock), + featureAddressLlmPredecomposeV1: input.featureAddressLlmPredecomposeV1, + runAddressLlmPreDecompose: input.runAddressLlmPreDecompose, + buildAddressLlmPredecomposeContractV1: input.buildAddressLlmPredecomposeContractV1, + sanitizeAddressMessageForFallback: input.sanitizeAddressMessageForFallback, + toNonEmptyString: input.toNonEmptyString, + resolveAddressFollowupCarryoverContext: input.resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision: input.resolveAssistantOrchestrationDecision, + buildAddressDialogContinuationContractV2: input.buildAddressDialogContinuationContractV2, + runtimeAnalysisContextAsOfDate: input.runtimeAnalysisContextAsOfDate, + payloadContextPeriodHint: input.payload?.context?.period_hint, + compactWhitespace: input.compactWhitespace, + runAddressLaneAttempt, + isRetryableAddressLimitedResult: input.isRetryableAddressLimitedResult, + finalizeAddressLaneResponse, + tryHandleLivingChat, + logEvent: input.logEvent, + nowIso: input.nowIso, + runAddressOrchestrationRuntime: input.runAddressOrchestrationRuntime, + runAddressToolGateRuntime: input.runAddressToolGateRuntime, + runAddressLaneRuntime: input.runAddressLaneRuntime + }); +} diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index 4ce7d38..ab0d78e 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -16,12 +16,9 @@ import * as openaiResponsesClient_1 from "./openaiResponsesClient"; import * as addressMcpClient_1 from "./addressMcpClient"; import * as capabilitiesRegistry_1 from "./capabilitiesRegistry"; import * as assistantCanon_1 from "./assistantCanon"; -import * as assistantAddressLaneResponseAttemptRuntimeAdapter_1 from "./assistantAddressLaneResponseAttemptRuntimeAdapter"; +import * as assistantAddressAttemptRuntimeAdapter_1 from "./assistantAddressAttemptRuntimeAdapter"; import * as assistantCoverageGrounding_1 from "./assistantCoverageGrounding"; import * as assistantDeepTurnAttemptRuntimeAdapter_1 from "./assistantDeepTurnAttemptRuntimeAdapter"; -import * as assistantAddressRuntimeAdapter_1 from "./assistantAddressRuntimeAdapter"; -import * as assistantAddressLaneAttemptRuntimeAdapter_1 from "./assistantAddressLaneAttemptRuntimeAdapter"; -import * as assistantLivingChatAttemptRuntimeAdapter_1 from "./assistantLivingChatAttemptRuntimeAdapter"; import * as assistantUserTurnBootstrapRuntimeAdapter_1 from "./assistantUserTurnBootstrapRuntimeAdapter"; import * as assistantQueryPlanning_1 from "./assistantQueryPlanning"; import iconv from "iconv-lite"; @@ -4340,40 +4337,30 @@ export class AssistantService { nowIso: () => new Date().toISOString() }); const sessionOrganizationScope = resolveSessionOrganizationScopeContext(userMessage, session.items); - const finalizeAddressLaneResponse = (addressLane, effectiveAddressUserMessage, carryoverMeta = null, llmPreDecomposeMeta = null) => (0, assistantAddressLaneResponseAttemptRuntimeAdapter_1.runAssistantAddressLaneResponseAttemptRuntime)({ - sessionId, - userMessage, - effectiveAddressUserMessage, - addressLane, - carryoverMeta, - llmPreDecomposeMeta, - knownOrganizations: sessionOrganizationScope.knownOrganizations, - activeOrganization: sessionOrganizationScope.activeOrganization, - sanitizeOutgoingAssistantText, - buildAddressDebugPayload, - buildAddressFollowupOffer, - mergeKnownOrganizations, - toNonEmptyString, - appendItem: (targetSessionId, item) => this.sessions.appendItem(targetSessionId, item), - getSession: (targetSessionId) => this.sessions.getSession(targetSessionId), - persistSession: (sessionState) => this.sessionLogger.persistSession(sessionState), - cloneConversation: (items) => cloneItems(items), - logEvent: (payload) => (0, log_1.logJson)(payload), - messageIdFactory: () => `msg-${(0, nanoid_1.nanoid)(10)}` - }); - const tryHandleLivingChat = async (modeDecision, addressRuntimeMeta = null) => (0, assistantLivingChatAttemptRuntimeAdapter_1.runAssistantLivingChatAttemptRuntime)({ + const addressRuntime = await (0, assistantAddressAttemptRuntimeAdapter_1.runAssistantAddressAttemptRuntime)({ + featureAssistantAddressQueryV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_V1, sessionId, userMessage, sessionItems: session.items, - modeDecision, + payload, sessionScope: { knownOrganizations: sessionOrganizationScope.knownOrganizations, selectedOrganization: sessionOrganizationScope.selectedOrganization, activeOrganization: sessionOrganizationScope.activeOrganization }, - addressRuntimeMeta, - traceIdFactory: () => `chat-${(0, nanoid_1.nanoid)(10)}`, + featureAddressLlmPredecomposeV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1, + runAddressLlmPreDecompose: async () => runAddressLlmPreDecompose(this.normalizerService, payload, userMessage), + buildAddressLlmPredecomposeContractV1: predecomposeContract_1.buildAddressLlmPredecomposeContractV1, + sanitizeAddressMessageForFallback, toNonEmptyString, + resolveAddressFollowupCarryoverContext, + resolveAssistantOrchestrationDecision, + buildAddressDialogContinuationContractV2, + runtimeAnalysisContextAsOfDate: runtimeAnalysisContext.as_of_date, + compactWhitespace, + mergeFollowupContextWithOrganizationScope, + runAddressQueryTryHandle: (laneMessageUsed, options) => this.addressQueryService.tryHandle(laneMessageUsed, options), + isRetryableAddressLimitedResult, mergeKnownOrganizations, hasAssistantDataScopeMetaQuestionSignal, shouldHandleAsAssistantCapabilityMetaQuery, @@ -4393,56 +4380,23 @@ export class AssistantService { buildAssistantDataScopeSelectionReply, buildAssistantOperationalBoundaryReply, buildAssistantCapabilityContractReply, + chatClient: this.chatClient, + loadAssistantCanonExcerpt: assistantCanon_1.loadAssistantCanonExcerpt, + sanitizeOutgoingAssistantText, + defaultModel: config_1.DEFAULT_MODEL, + defaultBaseUrl: config_1.DEFAULT_OPENAI_BASE_URL, + defaultApiKey: process.env.OPENAI_API_KEY ?? "", + buildAddressDebugPayload, + buildAddressFollowupOffer, appendItem: (targetSessionId, item) => this.sessions.appendItem(targetSessionId, item), getSession: (targetSessionId) => this.sessions.getSession(targetSessionId), persistSession: (sessionState) => this.sessionLogger.persistSession(sessionState), cloneConversation: (items) => cloneItems(items), logEvent: (payload) => (0, log_1.logJson)(payload), messageIdFactory: () => `msg-${(0, nanoid_1.nanoid)(10)}`, - nowIso: () => new Date().toISOString(), - payload, - chatClient: this.chatClient, - loadAssistantCanonExcerpt: assistantCanon_1.loadAssistantCanonExcerpt, - sanitizeOutgoingAssistantText, - defaultModel: config_1.DEFAULT_MODEL, - defaultBaseUrl: config_1.DEFAULT_OPENAI_BASE_URL, - defaultApiKey: process.env.OPENAI_API_KEY ?? "" - }); - let addressRuntimeMetaForDeep = null; - const runAddressLaneAttempt = async (messageUsed, carryMeta, analysisDateHint) => (0, assistantAddressLaneAttemptRuntimeAdapter_1.runAssistantAddressLaneAttemptRuntime)({ - messageUsed, - carryMeta: carryMeta ?? null, - analysisDateHint: analysisDateHint ?? null, - activeOrganization: sessionOrganizationScope.activeOrganization, - mergeFollowupContextWithOrganizationScope, - runAddressQueryTryHandle: (laneMessageUsed, options) => this.addressQueryService.tryHandle(laneMessageUsed, options) - }); - const addressRuntime = await (0, assistantAddressRuntimeAdapter_1.runAssistantAddressRuntime)({ - featureAssistantAddressQueryV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_V1, - sessionId, - userMessage, - sessionItems: session.items, - llmProvider: payload?.llmProvider, - useMock: Boolean(payload.useMock), - featureAddressLlmPredecomposeV1: config_1.FEATURE_ASSISTANT_ADDRESS_QUERY_LLM_PREDECOMPOSE_V1, - runAddressLlmPreDecompose: async () => runAddressLlmPreDecompose(this.normalizerService, payload, userMessage), - buildAddressLlmPredecomposeContractV1: predecomposeContract_1.buildAddressLlmPredecomposeContractV1, - sanitizeAddressMessageForFallback, - toNonEmptyString, - resolveAddressFollowupCarryoverContext, - resolveAssistantOrchestrationDecision, - buildAddressDialogContinuationContractV2, - runtimeAnalysisContextAsOfDate: runtimeAnalysisContext.as_of_date, - payloadContextPeriodHint: payload?.context?.period_hint, - compactWhitespace, - runAddressLaneAttempt, - isRetryableAddressLimitedResult, - finalizeAddressLaneResponse, - tryHandleLivingChat: (modeDecision, runtimeMeta) => tryHandleLivingChat(modeDecision, runtimeMeta), - logEvent: (payload) => (0, log_1.logJson)(payload), nowIso: () => new Date().toISOString() }); - addressRuntimeMetaForDeep = addressRuntime.addressRuntimeMetaForDeep; + const addressRuntimeMetaForDeep = addressRuntime.addressRuntimeMetaForDeep; if (addressRuntime.handled && addressRuntime.response) { return addressRuntime.response; } diff --git a/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts new file mode 100644 index 0000000..8b8e404 --- /dev/null +++ b/llm_normalizer/backend/tests/assistantAddressAttemptRuntimeAdapter.test.ts @@ -0,0 +1,196 @@ +import { describe, expect, it, vi } from "vitest"; +import { runAssistantAddressAttemptRuntime } from "../src/services/assistantAddressAttemptRuntimeAdapter"; + +function buildInput(overrides: Record = {}) { + return { + featureAssistantAddressQueryV1: true, + sessionId: "asst-1", + userMessage: "where are overdue docs", + sessionItems: [], + payload: { + llmProvider: "openai", + useMock: 1, + context: { + period_hint: "2020-07-31" + }, + apiKey: "key", + model: "gpt-5", + baseUrl: "http://localhost" + }, + sessionScope: { + knownOrganizations: ["Org A"], + selectedOrganization: "Org A", + activeOrganization: "Org A" + }, + featureAddressLlmPredecomposeV1: true, + runAddressLlmPreDecompose: async () => ({}), + buildAddressLlmPredecomposeContractV1: () => ({}), + sanitizeAddressMessageForFallback: (value: string) => value, + toNonEmptyString: (value: unknown) => + typeof value === "string" && value.trim().length > 0 ? value.trim() : null, + resolveAddressFollowupCarryoverContext: () => null, + resolveAssistantOrchestrationDecision: () => ({ mode: "address_query", runAddressLane: true }), + buildAddressDialogContinuationContractV2: () => ({}), + runtimeAnalysisContextAsOfDate: "2020-07-31", + compactWhitespace: (value: string) => String(value ?? "").replace(/\s+/g, " ").trim(), + mergeFollowupContextWithOrganizationScope: (followupContext: Record | null) => + followupContext, + runAddressQueryTryHandle: async () => ({ response_type: "READY" }), + isRetryableAddressLimitedResult: () => false, + mergeKnownOrganizations: (knownOrganizations: string[], selectedOrganization: string | null) => ({ + knownOrganizations, + selectedOrganization + }), + hasAssistantDataScopeMetaQuestionSignal: () => false, + shouldHandleAsAssistantCapabilityMetaQuery: () => false, + hasDestructiveDataActionSignal: () => false, + hasDangerOrCoercionSignal: () => false, + hasOperationalAdminActionRequestSignal: () => false, + hasOrganizationFactLookupSignal: () => false, + hasOrganizationFactFollowupSignal: () => false, + shouldEmitOrganizationSelectionReply: () => false, + hasAssistantCapabilityQuestionSignal: () => false, + resolveDataScopeProbe: () => null, + applyScriptGuard: (chatText: string) => chatText, + applyGroundingGuard: (guardInput: Record) => guardInput, + buildAssistantSafetyRefusalReply: () => "safety", + buildAssistantDataScopeContractReply: () => "scope", + buildAssistantOrganizationFactBoundaryReply: () => "boundary", + buildAssistantDataScopeSelectionReply: () => "selection", + buildAssistantOperationalBoundaryReply: () => "operational", + buildAssistantCapabilityContractReply: () => "capability", + chatClient: {} as any, + loadAssistantCanonExcerpt: () => "", + sanitizeOutgoingAssistantText: (value: unknown, fallback = "") => { + const text = typeof value === "string" ? value.trim() : ""; + return text || fallback; + }, + defaultModel: "gpt-5", + defaultBaseUrl: "http://localhost", + defaultApiKey: "key", + buildAddressDebugPayload: () => ({}), + buildAddressFollowupOffer: () => null, + appendItem: () => {}, + getSession: () => ({ + session_id: "asst-1", + updated_at: "", + items: [], + investigation_state: null + }), + persistSession: () => {}, + cloneConversation: (items: unknown[]) => items, + logEvent: () => {}, + messageIdFactory: () => "msg-111", + nowIso: () => "2026-01-01T00:00:00.000Z", + ...overrides + } as any; +} + +describe("assistant address attempt runtime adapter", () => { + it("wires lane, response and living-chat attempt runtimes through one boundary", async () => { + const runAddressLaneAttemptRuntime = vi.fn(async () => ({ + response_type: "READY" + })); + const runAddressLaneResponseAttemptRuntime = vi.fn(() => ({ + kind: "address" + })); + const runLivingChatAttemptRuntime = vi.fn(async () => ({ + kind: "chat" + })); + const runAddressRuntime = vi.fn(async (input: any) => { + const laneResult = await input.runAddressLaneAttempt( + "lane-message", + { followupContext: { previous_intent: "docs_by_counterparty" } }, + "2020-08-31" + ); + expect(laneResult).toEqual({ response_type: "READY" }); + + const livingChatResult = await input.tryHandleLivingChat( + { mode: "chat", reason: "living_chat_signal_detected" }, + { source: "address_runtime" } + ); + expect(livingChatResult).toEqual({ kind: "chat" }); + + const response = input.finalizeAddressLaneResponse( + { reply_text: "address reply", reply_type: "factual_with_explanation" }, + "lane-message", + { previousReplyType: "partial_coverage" }, + { mode: "supported", confidence: "high" } + ); + expect(response).toEqual({ kind: "address" }); + + return { + handled: true, + response: { ok: true, lane: "address" }, + addressRuntimeMetaForDeep: { source: "address_runtime" } + }; + }); + + const runtime = await runAssistantAddressAttemptRuntime( + buildInput({ + runAddressRuntime, + runAddressLaneAttemptRuntime, + runAddressLaneResponseAttemptRuntime, + runLivingChatAttemptRuntime + }) + ); + + expect(runtime).toEqual({ + handled: true, + response: { ok: true, lane: "address" }, + addressRuntimeMetaForDeep: { source: "address_runtime" } + }); + expect(runAddressRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + llmProvider: "openai", + useMock: true, + payloadContextPeriodHint: "2020-07-31" + }) + ); + expect(runAddressLaneAttemptRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + messageUsed: "lane-message", + analysisDateHint: "2020-08-31", + activeOrganization: "Org A" + }) + ); + expect(runLivingChatAttemptRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + sessionId: "asst-1", + sessionScope: expect.objectContaining({ + selectedOrganization: "Org A" + }) + }) + ); + expect(runAddressLaneResponseAttemptRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + sessionId: "asst-1", + effectiveAddressUserMessage: "lane-message", + knownOrganizations: ["Org A"] + }) + ); + }); + + it("passes empty payload fields to address runtime without breaking defaults", async () => { + const runAddressRuntime = vi.fn(async () => ({ + handled: false, + response: null, + addressRuntimeMetaForDeep: null + })); + + await runAssistantAddressAttemptRuntime( + buildInput({ + payload: {}, + runAddressRuntime + }) + ); + + expect(runAddressRuntime).toHaveBeenCalledWith( + expect.objectContaining({ + llmProvider: undefined, + useMock: false, + payloadContextPeriodHint: undefined + }) + ); + }); +}); diff --git a/llm_normalizer/data/autorun_annotations/annotations.json b/llm_normalizer/data/autorun_annotations/annotations.json index d025ed6..ea7726a 100644 --- a/llm_normalizer/data/autorun_annotations/annotations.json +++ b/llm_normalizer/data/autorun_annotations/annotations.json @@ -565,5 +565,113 @@ "question_text": "какие контрагенты пока вообще не платят за текущий месяц и это тревожный знак для нас?", "answer_text": "Активные заказчики в выбранном периоде: 46.\nСобран профиль активности заказчиков (bank-doc activity aggregate).\nСтрок агрегата: 46.\n1. ГКУ | операций: 44 | последняя активность: 2019-12-25T16:09:27Z\n2. ГБУ \\Многофункциональный центр\\ | операций: 38 | последняя активность: 2018-12-04T12:00:01Z\n3. СБЕРБАНК, ПАО | операций: 36 | последняя активность: 2020-03-02T12:00:04Z\n4. Комитет государственных услуг г. Москвы | операций: 27 | последняя активность: 2015-05-19T12:00:00Z\n5. Группа | операций: 26 | последняя активность: 2021-11-10T12:00:07Z\n6. НОРТОН | операций: 14 | последняя активность: 2022-01-20T12:00:05Z\n7. МНН | операций: 14 | последняя активность: 2022-01-20T12:00:04Z\n8. МОСКОВСКИЙ ФИЛИАЛ ООО КБ\\МЕГАПОЛИС\\ | операций: 11 | последняя активность: 2019-07-31T12:00:22Z\n9. Департамент капитального ремонта города Москвы. | операций: 10 | последняя активность: 2019-09-16T11:00:00Z\n10. Департамент финансов города Москвы | операций: 10 | последняя активность: 2015-12-29T12:00:03Z\n11. Лайсвуд, ООО | операций: 7 | последняя активность: 2020-03-02T12:00:02Z\n12. Единая электронная торговая площадка, АО | операций: 7 | последняя активность: 2018-08-10T12:00:00Z\n13. ЭталонМебель | операций: 6 | последняя активность: 2020-04-27T12:00:00Z\n14. ИП Калинин Н.М. | операций: 5 | последняя активность: 2020-03-02T12:00:03Z\n15. Элмаш-К | операций: 5 | последняя активность: 2018-02-06T12:32:17Z\n16. ГТК-Интер, ООО | операций: 5 | последняя активность: 2016-07-07T12:00:00Z\n17. Гамма-мебель, ООО | операций: 4 | последняя активность: 2020-08-11T13:15:30Z\n18. Смарт | операций: 4 | последняя активность: 2020-02-07T12:00:03Z\n19. Сбербанк-АСТ, ЗАО | операций: 4 | последняя активность: 2017-07-26T12:00:00Z\n20. КБ \\НЕФТЯНОЙ АЛЬЯНС\\ (ПАО) (Расчетный) | операций: 4 | последняя активность: 2015-02-05T12:00:06Z\n21. ЗАО Ремонтно-строительная фирма «Ремстройсервис» | операций: 3 | последняя активность: 2020-06-03T00:00:02Z\n22. ПрофТренд,ООО | операций: 3 | последняя активность: 2020-05-15T12:00:02Z\n23. ГБПОУ г. Москвы \\Московская театральная школа Олега Табакова\\ | операций: 3 | последняя активность: 2019-10-25T12:00:00Z\n24. Газпром авиа, ООО | операций: 3 | последняя активность: 2016-11-29T12:00:01Z\n25. ФГКУ \\Центрреставрация\\ | операций: 3 | последняя активность: 2015-12-30T12:00:03Z\n26. РАЙМ, ООО | операций: 3 | последняя активность: 2015-08-13T12:00:00Z\n27. СервисКонсалт, ООО | операций: 2 | последняя активность: 2022-04-13T12:00:00Z\n30. УФК по г. Москве (ГУ - | операций: 2 | последняя активность: 2018-12-12T12:00:01Z\n31. Чепурнов П.Д. | операций: 1 | последняя активность: 2022-01-20T12:00:03Z\n32. «Олимпстрой» | операций: 1 | последняя активность: 2020-07-13T12:00:00Z\n33. КПК \\Столичная Сберегательная компания\\ | операций: 1 | последняя активность: 2019-04-17T12:00:00Z\n34. РТС-тендер | операций: 1 | последняя активность: 2018-12-17T00:00:00Z\n35. ФГБУ «УЗС» | операций: 1 | последняя активность: 2018-12-13T12:00:02Z\n36. АС \\ЕО | операций: 1 | последняя активность: 2018-01-15T13:25:36Z\n37. АкваСервис, ООО | операций: 1 | последняя активность: 2017-09-19T12:00:01Z\n38. ГБУ МФЦ города Москвы | операций: 1 | последняя активность: 2017-04-27T00:00:00Z\n39. \\Жуковка 51\\ | операций: 1 | последняя активность: 2017-04-10T16:50:38Z\n40. 0 | операций: 1 | последняя активность: 2017-03-02T12:00:00Z\n41. ФГБОУ ВО \\Московский государственный университет имени М.В. Ломоносова\\ | операций: 1 | последняя активность: 2016-11-25T00:00:00Z\n42. ДжетАэроКонцепт, ООО | операций: 1 | последняя активность: 2016-08-18T13:13:35Z\n43. УМ и автотранспорта, ЗАО | операций: 1 | последняя активность: 2015-12-24T12:00:00Z\n44. Межрегиональное операционное УФК | операций: 1 | последняя активность: 2015-11-12T12:00:00Z\n45. Банк ГПБ (АО) г. Москва | операций: 1 | последняя активность: 2015-10-30T12:00:03Z\n46. Спецстрой, ООО | операций: 1 | последняя активность: 2015-09-14T12:00:00Z" } + }, + { + "annotation_id": "ann-mntec9ev-cb1t612", + "run_id": "assistant-stage1-ZL97weIIRG", + "case_id": "AUTO-005", + "session_id": "assistant-stage1-ZL97weIIRG-AUTO-005", + "message_index": 1, + "rating": 1, + "comment": "мы модем показать договора заведенные без оплавт на период рассмотрения - тема не сложная надо отработать", + "manual_case_decision": "needs_routing_extension", + "annotation_author": "manual_reviewer", + "resolved": false, + "resolved_at": null, + "resolved_by": null, + "created_at": "2026-04-10T21:06:49.639Z", + "updated_at": "2026-04-10T21:06:49.639Z", + "context": { + "message_id": "msg-MWxfeEbPmS", + "trace_id": "address-V5-7tJrBPM", + "reply_type": "partial_coverage", + "eval_target": "assistant_stage1", + "prompt_version": "address_query_runtime_v1", + "domain": null, + "query_class": null, + "question_text": "Где у нас документы есть, но нет денег за них, и это уже выглядит как серьезная задолженность контрагента?", + "answer_text": "Чтобы ответить надежно, нужен более точный ориентир в запросе.\nКоротко: в запросе не хватает конкретного ориентира (контрагент, договор или период).\nЧто можно сделать дальше: укажите контрагента или номер/название договора." + } + }, + { + "annotation_id": "ann-mnteex5s-ebey3ho", + "run_id": "assistant-stage1-ZL97weIIRG", + "case_id": "AUTO-008", + "session_id": "assistant-stage1-ZL97weIIRG-AUTO-008", + "message_index": 1, + "rating": 1, + "comment": "тут надо сапоставить договора с датами и отсутствие платежей по ним или старые платежи авансовые - надо дороботать - вопрос простой и важный", + "manual_case_decision": "candidate_for_implementation", + "annotation_author": "manual_reviewer", + "resolved": false, + "resolved_at": null, + "resolved_by": null, + "created_at": "2026-04-10T21:08:53.728Z", + "updated_at": "2026-04-10T21:08:53.728Z", + "context": { + "message_id": "msg-GwBH6jyVi_", + "trace_id": "address-719KtaE1Li", + "reply_type": "partial_coverage", + "eval_target": "assistant_stage1", + "prompt_version": "address_query_runtime_v1", + "domain": null, + "query_class": null, + "question_text": "Покажи контрагентов с максимальными долгами и уточни, нет ли среди них тех, кто просто игнорирует наши накладные.", + "answer_text": "Чтобы ответить надежно, нужен более точный ориентир в запросе.\nКоротко: в запросе не хватает конкретного ориентира (контрагент, договор или период).\nЧто можно сделать дальше: укажите контрагента или номер/название договора." + } + }, + { + "annotation_id": "ann-mnteftxo-5ux4j2l", + "run_id": "assistant-stage1-ZL97weIIRG", + "case_id": "AUTO-009", + "session_id": "assistant-stage1-ZL97weIIRG-AUTO-009", + "message_index": 1, + "rating": 1, + "comment": "технический ответ - такого быть не должно", + "manual_case_decision": "needs_dialog_policy_fix", + "annotation_author": "manual_reviewer", + "resolved": false, + "resolved_at": null, + "resolved_by": null, + "created_at": "2026-04-10T21:09:36.200Z", + "updated_at": "2026-04-10T21:09:36.200Z", + "context": { + "message_id": "msg-xCoMu24uIa", + "trace_id": "Idd369iAGgAGpm", + "reply_type": "clarification_required", + "eval_target": "assistant_stage1", + "prompt_version": "address_query_runtime_v1", + "domain": null, + "query_class": null, + "question_text": "Какие поставщики уже больше месяца не закрывают свои счета - это требует ручной проверки?", + "answer_text": "Коротко: Проблема подтверждается частично; для уверенного вывода нужны уточнения. Что сломано:\n- Есть признаки проблемы, но без уточнений по периоду и объекту вывод ненадежен. Почему это похоже на проблему:\n- Сигнал проблемы есть, но механизм подтвержден не полностью. На чем это основано:\n- Опора частичная: часть требований покрыта не полностью.\n- Отдельно не подтверждено или покрыто частично: R1. Что проверить первым:\n- Уточните период проверки, чтобы подтвердить проблему без лишнего шума.\n- Проверьте связку документов и проводок по проблемному участку в указанном периоде.\n- Уточните период проверки (например, июль 2020).\n- Уточните счет или группу счетов (например, 19, 60, 62). Ограничения:\n- Ни одно требование не получило подтвержденного покрытия.\n- В текущей выборке не хватает явных подтверждений, почему записи попали в ответ.\n- Часть контекста вопроса не подтверждена напрямую в найденных данных: counterparty.\n- Недостаточно подтвержденных данных для уверенного ответа.\n- Не хватило целевых подтверждений по выбранному сценарию." + } + }, + { + "annotation_id": "ann-mntegxtj-rlfz1um", + "run_id": "assistant-stage1-ZL97weIIRG", + "case_id": "AUTO-010", + "session_id": "assistant-stage1-ZL97weIIRG-AUTO-010", + "message_index": 1, + "rating": 1, + "comment": "ушло не в ту ветку - ответ совершенно не в кассу", + "manual_case_decision": "needs_dialog_policy_fix", + "annotation_author": "manual_reviewer", + "resolved": false, + "resolved_at": null, + "resolved_by": null, + "created_at": "2026-04-10T21:10:27.894Z", + "updated_at": "2026-04-10T21:10:27.894Z", + "context": { + "message_id": "msg-_p_ppJ9bfV", + "trace_id": "chat-QQxVBg9vSO", + "reply_type": "factual_with_explanation", + "eval_target": "assistant_stage1", + "prompt_version": "address_query_runtime_v1", + "domain": null, + "query_class": null, + "question_text": "Проверь зависшие авансы и уточни, можно ли их перепривязать на текущие отгрузки или пора списывать как нереальные?", + "answer_text": "Я ассистент по анализу данных 1С в режиме чтения.\nЧто умею по группам:\n1. НДС: Расчеты и аналитика по НДС на основании данных 1С. (например: vat_period_snapshot, vat_payable_forecast, vat_turnover_breakdown).\n2. Контрагенты: Срезы активности, платежей и документов по контрагентам. (например: list_documents_by_counterparty, bank_operations_by_counterparty, list_contracts_by_counterparty).\n3. Задолженности и расчеты: Аналитика закрытия расчетов, сальдо и признаков незакрытых цепочек. (например: settlement_closure_state, advance_offset_state, open_items_snapshot).\n4. Деньги и остатки: Остатки и динамика по денежным счетам и кассе. (например: balance_snapshot, turnover_by_period).\n5. Ограничения: Операции, которые ассистент не выполняет в этом рантайме. (например: explain_boundary, suggest_safe_next_step).\nЕсли хотите, раскрою любую группу точечно и дам готовую формулировку запроса.\nЧто не делаю: не настраиваю 1С, не меняю конфигурацию, не создаю и не провожу документы, не выполняю админ-действия на сервере." + } } ] \ No newline at end of file diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index b843163..16c214b 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,64 @@ [ + { + "generation_id": "gen-mnte8abx-ax3v3tr", + "created_at": "2026-04-10T21:03:44.205Z", + "mode": "qwen_seed", + "count": 10, + "domain": null, + "questions": [ + "Покажи контрагентов с максимальными долгами, которые уже больше месяца не платят, и проверь, нет ли у них непроверенных авансовых отгрузок.", + "Где по покупателям висят заказы на конец месяца, но денег за них нет - требует ручной сверки?", + "Посмотри контрагентов, где сальдо не совпадает с актом сверки, и уточни, кого нужно уже непременно запросить справку по этой разнице.", + "Какие авансы давно остались висящими без закрытия - их пора либо отменять, либо перекладывать на счета реальных поставок?", + "Где у нас документы есть, но нет денег за них, и это уже выглядит как серьезная задолженность контрагента?", + "Проверь контрагентов с максимальными долгами - нет ли среди них тех, кто просто не закрыл накладные или оставил их без оплаты?", + "Какие реализации зависли на конец периода и могут портить выручку, если не проверять заранее?", + "Покажи контрагентов с максимальными долгами и уточни, нет ли среди них тех, кто просто игнорирует наши накладные.", + "Какие поставщики уже больше месяца не закрывают свои счета - это требует ручной проверки?", + "Проверь зависшие авансы и уточни, можно ли их перепривязать на текущие отгрузки или пора списывать как нереальные?" + ], + "generated_by": "manual_reviewer", + "saved_case_set_file": "assistant_autogen_qwen_seed_20260410210344_gen-mnte8abx-ax3v3tr.json", + "context": { + "llm_provider": "local", + "model": "Qwen2.5 14B Instruct 1M", + "assistant_prompt_version": "address_query_runtime_v1", + "decomposition_prompt_version": "normalizer_v2_0_2", + "prompt_fingerprint": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.\nYou are semantic-normalizer for accounting assistant NDC.\nReturn strict JSON only, no markdown, no comments.\n\nTarget schema: normalized_query_v2_0_2.\n\nCore behavior (v2.0.2):\n1. Decompose message into semantic fragments.\n2. Classify fragment domain relevance and business scope.\n3. Fill route-critical flags and ", + "autogen_personality_id": "general", + "autogen_personality_prompt": "Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл. акцент на контрагентов, долги нсд, счета, общий вывод по компании - контрагенты, заказчикам, скока денег кто принес и какие остатки по счетам, поиск документов, сальдо, банковские операции, незакрытые договора, документы по договорам, долги, Активность заказчиков по периодам, Поставщики и выплаты" + } + }, + { + "generation_id": "gen-mnte6y9p-4v1kfbw", + "created_at": "2026-04-10T21:02:41.918Z", + "mode": "qwen_seed", + "count": 10, + "domain": null, + "questions": [ + "Какие поставщики пока не закрыли взаиморасчёты на конец месяца и это выглядит как серьёзная проблема, а не просто задержка?", + "Где у нас висят покупатели 'грузили - денег нет - закрытия нет' и кто из них требует ручной проверки уже сейчас?", + "Покажи контрагентов с вероятным несоответствием сальдо, если мы запросим их акт сверки прямо сейчас.", + "Где у нас есть оплаты, но документы для закрытия взаиморасчётов всё ещё не пришли?", + "Какие контрагенты имеют документы, но нет нормального закрытия по оплатам?", + "Есть ли зависшие авансы, которые давно нужно перепроверить или закрыть?", + "Какие реализации на конец периода выглядят так, будто они зависли и могут испортить картину по выручке?", + "Где у нас отгрузки с проблемами не только в оплате, но и в самой связке документов?", + "Кто из поставщиков активно работает с нами последнее время и сколько денег принесли за последние 3 месяца?", + "Какие незакрытые договора есть на данный момент и что связано с ними по документам, долги и оплаты?" + ], + "generated_by": "manual_reviewer", + "saved_case_set_file": "assistant_autogen_qwen_seed_20260410210241_gen-mnte6y9p-4v1kfbw.json", + "context": { + "llm_provider": "local", + "model": "Qwen2.5 14B Instruct 1M", + "assistant_prompt_version": "address_query_runtime_v1", + "decomposition_prompt_version": "normalizer_v2_0_2", + "prompt_fingerprint": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.\nYou are semantic-normalizer for accounting assistant NDC.\nReturn strict JSON only, no markdown, no comments.\n\nTarget schema: normalized_query_v2_0_2.\n\nCore behavior (v2.0.2):\n1. Decompose message into semantic fragments.\n2. Classify fragment domain relevance and business scope.\n3. Fill route-critical flags and ", + "autogen_personality_id": "general", + "autogen_personality_prompt": "Генерируй реалистичные живые вопросы бухгалтера по 1С. Добавляй разговорные формулировки и опечатки, но сохраняй бизнес-смысл. акцент на контрагентов, долги нсд, счета, общий вывод по компании - контрагенты, заказчикам, скока денег кто принес и какие остатки по счетам, поиск документов, сальдо, банковские операции, незакрытые договора, документы по договорам, долги, Активность заказчиков по периодам, Поставщики и выплаты" + } + }, { "generation_id": "gen-mnsolawk-vugqyoc", "created_at": "2026-04-10T09:06:01.461Z", diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210241_gen-mnte6y9p-4v1kfbw.json b/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210241_gen-mnte6y9p-4v1kfbw.json new file mode 100644 index 0000000..09fa32a --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210241_gen-mnte6y9p-4v1kfbw.json @@ -0,0 +1,174 @@ +{ + "suite_id": "assistant_autogen_gen-mnte6y9p-4v1kfbw", + "suite_version": "0.1.0", + "schema_version": "assistant_autogen_suite_v0_1", + "generated_at": "2026-04-10T21:02:41.918Z", + "generation_id": "gen-mnte6y9p-4v1kfbw", + "mode": "qwen_seed", + "domain": null, + "scenario_count": 10, + "case_ids": [ + "AUTO-001", + "AUTO-002", + "AUTO-003", + "AUTO-004", + "AUTO-005", + "AUTO-006", + "AUTO-007", + "AUTO-008", + "AUTO-009", + "AUTO-010" + ], + "cases": [ + { + "case_id": "AUTO-001", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие поставщики пока не закрыли взаиморасчёты на конец месяца и это выглядит как серьёзная проблема, а не просто задержка?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-002", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где у нас висят покупатели 'грузили - денег нет - закрытия нет' и кто из них требует ручной проверки уже сейчас?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-003", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Покажи контрагентов с вероятным несоответствием сальдо, если мы запросим их акт сверки прямо сейчас." + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-004", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где у нас есть оплаты, но документы для закрытия взаиморасчётов всё ещё не пришли?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-005", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие контрагенты имеют документы, но нет нормального закрытия по оплатам?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-006", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Есть ли зависшие авансы, которые давно нужно перепроверить или закрыть?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-007", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие реализации на конец периода выглядят так, будто они зависли и могут испортить картину по выручке?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-008", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где у нас отгрузки с проблемами не только в оплате, но и в самой связке документов?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-009", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Кто из поставщиков активно работает с нами последнее время и сколько денег принесли за последние 3 месяца?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-010", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие незакрытые договора есть на данный момент и что связано с ними по документам, долги и оплаты?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210344_gen-mnte8abx-ax3v3tr.json b/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210344_gen-mnte8abx-ax3v3tr.json new file mode 100644 index 0000000..ac99775 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_qwen_seed_20260410210344_gen-mnte8abx-ax3v3tr.json @@ -0,0 +1,174 @@ +{ + "suite_id": "assistant_autogen_gen-mnte8abx-ax3v3tr", + "suite_version": "0.1.0", + "schema_version": "assistant_autogen_suite_v0_1", + "generated_at": "2026-04-10T21:03:44.205Z", + "generation_id": "gen-mnte8abx-ax3v3tr", + "mode": "qwen_seed", + "domain": null, + "scenario_count": 10, + "case_ids": [ + "AUTO-001", + "AUTO-002", + "AUTO-003", + "AUTO-004", + "AUTO-005", + "AUTO-006", + "AUTO-007", + "AUTO-008", + "AUTO-009", + "AUTO-010" + ], + "cases": [ + { + "case_id": "AUTO-001", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Покажи контрагентов с максимальными долгами, которые уже больше месяца не платят, и проверь, нет ли у них непроверенных авансовых отгрузок." + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-002", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где по покупателям висят заказы на конец месяца, но денег за них нет - требует ручной сверки?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-003", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Посмотри контрагентов, где сальдо не совпадает с актом сверки, и уточни, кого нужно уже непременно запросить справку по этой разнице." + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-004", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие авансы давно остались висящими без закрытия - их пора либо отменять, либо перекладывать на счета реальных поставок?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-005", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где у нас документы есть, но нет денег за них, и это уже выглядит как серьезная задолженность контрагента?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-006", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Проверь контрагентов с максимальными долгами - нет ли среди них тех, кто просто не закрыл накладные или оставил их без оплаты?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-007", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие реализации зависли на конец периода и могут портить выручку, если не проверять заранее?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-008", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Покажи контрагентов с максимальными долгами и уточни, нет ли среди них тех, кто просто игнорирует наши накладные." + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-009", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие поставщики уже больше месяца не закрывают свои счета - это требует ручной проверки?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + }, + { + "case_id": "AUTO-010", + "scenario_tag": "qwen_seed_general", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Проверь зависшие авансы и уточни, можно ли их перепривязать на текущие отгрузки или пора списывать как нереальные?" + } + ], + "expected_hints": { + "expected_reply_type": null, + "expected_degraded_to": null + } + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-bOkyd627Q3.json b/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-bOkyd627Q3.json new file mode 100644 index 0000000..c8a9223 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-bOkyd627Q3.json @@ -0,0 +1,130 @@ +{ + "suite_id": "assistant_autogen_runtime_job-bOkyd627Q3", + "suite_version": "0.1.0", + "schema_version": "assistant_autogen_runtime_v0_1", + "scenario_count": 10, + "case_ids": [ + "AUTO-001", + "AUTO-002", + "AUTO-003", + "AUTO-004", + "AUTO-005", + "AUTO-006", + "AUTO-007", + "AUTO-008", + "AUTO-009", + "AUTO-010" + ], + "cases": [ + { + "case_id": "AUTO-001", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Покажи контрагентов с максимальными долгами, которые уже больше месяца не платят, и проверь, нет ли у них непроверенных авансовых отгрузок." + } + ] + }, + { + "case_id": "AUTO-002", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где по покупателям висят заказы на конец месяца, но денег за них нет - требует ручной сверки?" + } + ] + }, + { + "case_id": "AUTO-003", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Посмотри контрагентов, где сальдо не совпадает с актом сверки, и уточни, кого нужно уже непременно запросить справку по этой разнице." + } + ] + }, + { + "case_id": "AUTO-004", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие авансы давно остались висящими без закрытия - их пора либо отменять, либо перекладывать на счета реальных поставок?" + } + ] + }, + { + "case_id": "AUTO-005", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Где у нас документы есть, но нет денег за них, и это уже выглядит как серьезная задолженность контрагента?" + } + ] + }, + { + "case_id": "AUTO-006", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Проверь контрагентов с максимальными долгами - нет ли среди них тех, кто просто не закрыл накладные или оставил их без оплаты?" + } + ] + }, + { + "case_id": "AUTO-007", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие реализации зависли на конец периода и могут портить выручку, если не проверять заранее?" + } + ] + }, + { + "case_id": "AUTO-008", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Покажи контрагентов с максимальными долгами и уточни, нет ли среди них тех, кто просто игнорирует наши накладные." + } + ] + }, + { + "case_id": "AUTO-009", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Какие поставщики уже больше месяца не закрывают свои счета - это требует ручной проверки?" + } + ] + }, + { + "case_id": "AUTO-010", + "scenario_tag": "autogen_runtime", + "question_type": "direct", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Проверь зависшие авансы и уточни, можно ли их перепривязать на текущие отгрузки или пора списывать как нереальные?" + } + ] + } + ] +} \ No newline at end of file