diff --git a/docs/orchestration/agent_cashflow_no_tops_20260523.json b/docs/orchestration/agent_cashflow_no_tops_20260523.json new file mode 100644 index 0000000..28a201f --- /dev/null +++ b/docs/orchestration/agent_cashflow_no_tops_20260523.json @@ -0,0 +1,118 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "agent_cashflow_no_tops_20260523", + "domain": "autonomy_business_answer_contract", + "title": "AGENT | Cashflow no-tops display modifier", + "description": "Targeted AGENT replay: after a 2020 business overview, a compact 'без топов' follow-up must preserve the 2020 company cashflow context and suppress rankings instead of excluding top counterparties.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_direct_money_2020", + "title": "Direct cashflow baseline for 2020", + "question": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"], + "required_answer_patterns_all": [ + "47[\\s.]*628[\\s.]*853", + "43[\\s.]*763[\\s.]*351", + "3[\\s.]*865[\\s.]*501" + ], + "forbidden_answer_patterns": [ + "Учтено строк", + "Первая найденная дата", + "runtime_", + "planner_", + "query_movements", + "primitive", + "7\\s*136\\s*815|7136815" + ], + "criticality": "critical", + "semantic_tags": ["cashflow", "direct_answer", "baseline"] + }, + { + "step_id": "step_02_explicit_overview_with_tops", + "title": "Explicit overview may include top counterparties", + "question": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"], + "required_answer_patterns_all": [ + "47[\\s.]*628[\\s.]*853", + "43[\\s.]*763[\\s.]*351", + "3[\\s.]*865[\\s.]*501", + "12[\\s.]*792[\\s.]*194", + "12[\\s.]*093[\\s.]*465", + "9[\\s.]*612[\\s.]*904" + ], + "forbidden_answer_patterns": [ + "Учтено строк", + "Первая найденная дата", + "runtime_", + "planner_", + "query_movements", + "primitive" + ], + "criticality": "high", + "semantic_tags": ["business_overview", "tops_allowed", "bank_boundary"] + }, + { + "step_id": "step_03_compact_no_tops_followup", + "title": "No-tops compact follow-up preserves 2020 cashflow", + "question": "а если коротко, сколько заработали деньгами без топов?", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"], + "required_answer_patterns_all": [ + "2020", + "47[\\s.]*628[\\s.]*853", + "43[\\s.]*763[\\s.]*351", + "3[\\s.]*865[\\s.]*501" + ], + "forbidden_answer_patterns": [ + "2026-05-23", + "получили\\s+0\\s*руб", + "заплатили(?:/списали)?\\s+0\\s*руб", + "нетто\\s+0\\s*руб", + "с исключением крупнейших", + "исключением крупнейших", + "Комитет государственных услуг", + "Группа СВК", + "СБЕРБАНК", + "Что проверить дальше", + "Учтено строк", + "Первая найденная дата", + "runtime_", + "planner_", + "query_movements", + "primitive" + ], + "criticality": "critical", + "semantic_tags": ["compact_after_overview", "no_tops_display_modifier", "temporal_carryover"] + }, + { + "step_id": "step_04_plain_money_in_out_net", + "title": "Explicit plain money request remains compact", + "question": "не обзор, просто деньги: пришло, ушло, нетто за 2020", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"], + "required_answer_patterns_all": [ + "47[\\s.]*628[\\s.]*853", + "43[\\s.]*763[\\s.]*351", + "3[\\s.]*865[\\s.]*501" + ], + "forbidden_answer_patterns": [ + "Комитет государственных услуг", + "Группа СВК", + "СБЕРБАНК", + "Что проверить дальше", + "Учтено строк", + "Первая найденная дата", + "runtime_", + "planner_", + "query_movements", + "primitive" + ], + "criticality": "critical", + "semantic_tags": ["direct_money_only", "ranking_suppression"] + } + ], + "acceptance": { + "min_score": 80, + "max_unresolved_p0": 0, + "require_all_critical_steps_pass": true + } +} diff --git a/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js b/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js index 08c0f8e..aaa1c3a 100644 --- a/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js @@ -134,6 +134,20 @@ function detectCounterpartyTurnoverFamily(text) { entity }; } +function hasCompactOrganizationCashflowDisplaySignal(text) { + const normalized = String(text ?? ""); + if (!normalized) { + return false; + } + const hasCompactCue = /(?:\u043a\u043e\u0440\u043e\u0442\u043a\w*|\u043d\u0435\s+\u043e\u0431\u0437\u043e\u0440|\u043f\u0440\u043e\u0441\u0442\u043e\s+\u0434\u0435\u043d\p{L}*|\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u0431\u0435\u0437\s+\u0440\u0430\u0437\u0431\u0438\u0432\p{L}*)/iu.test(normalized); + if (!hasCompactCue) { + return false; + } + const hasMoneyCue = /(?:\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\u0435\u0436\p{L}*|\u043f\u0440\u0438\u0448\p{L}*|\u0443\u0448\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0441\u043f\u0438\u0441\u0430\p{L}*|\u043d\u0435\u0442\u0442\u043e|cash|money|incoming|outgoing|net)/iu.test(normalized); + const hasOrganizationEarningsCue = /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|\u0437\u0430\u0440\u0430\u0431\u043e\u0442\p{L}*|\u043f\u0440\u0438\u0448\p{L}*|\u0443\u0448\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0434\u0435\u043d\u0435\u0436\p{L}*\s+\u043d\u0435\u0442\u0442\u043e|how\s+much|earned|received|paid)/iu.test(normalized); + const hasExplicitExclusionCue = /(?:\u0438\u0441\u043a\u043b\u044e\u0447\p{L}*|\u0443\u0431\u0435\u0440\p{L}*|\u043a\u0440\u043e\u043c\u0435|exclude|excluding|without)[\s\S]{0,80}(?:\u043a\u0440\u0443\u043f\u043d\p{L}*|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\p{L}*|\u043a\u043b\u0438\u0435\u043d\u0442\p{L}*|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a\p{L}*|\u043e\u043f\u0435\u0440\u0430\u0446\p{L}*|counterpart|customer|supplier|operation)/iu.test(normalized); + return hasMoneyCue && hasOrganizationEarningsCue && !hasExplicitExclusionCue; +} function detectScopedCounterpartyEntity(text) { const patterns = [ /(?:^|[\s,.;:!?])(?:\u043f\u043e|\u0443|\u0434\u043b\u044f|by|for)\s+(.+?)(?=$|[,.;:!?]|\s+(?:\u0437\u0430|\u043d\u0430|\u0432|\u0432\u043e|\u043a|\u043f\u043e|\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a|\u043a\u0430\u043a|\u043a\u0430\u043a\u043e\u0435|\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0430\u044f|\u043a\u0430\u043a\u0438\u0435|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u043d\u0435\u0442\u0442\u043e|\u0441\u0430\u043b\u044c\u0434\u043e|\u0434\u0435\u043d\u0435\u0433|\u0434\u0435\u043d\u0435\u0436\p{L}*|\u043f\u043b\u0430\u0442[\u0435\u0451]\u0436\p{L}*|\u0438\u0441\u0445\u043e\u0434\p{L}*|\u0432\u0445\u043e\u0434\p{L}*)(?=$|[\s,.;:!?]))/iu, @@ -350,13 +364,16 @@ function createAssistantTurnMeaningPolicy(deps = {}) { const rawText = normalizeTurnText(rawMessage, deps); const effectiveText = normalizeTurnText(effectiveMessage, deps); const joinedText = fallbackCompactWhitespace(`${rawText} ${effectiveText}`); - const supportedIntent = detectSupportedIntent(joinedText, deps); + const compactOrganizationCashflowDisplay = hasCompactOrganizationCashflowDisplaySignal(rawText); + const supportedIntent = compactOrganizationCashflowDisplay ? null : detectSupportedIntent(joinedText, deps); const counterpartyBidirectionalValueFlow = detectCounterpartyBidirectionalValueFlowFamily(joinedText); const counterpartyTurnover = detectCounterpartyTurnoverFamily(joinedText); const selectedObjectInventoryExact = hasSelectedObjectInventoryExactSignal(joinedText); - const broadBusinessEvaluation = selectedObjectInventoryExact || counterpartyBidirectionalValueFlow?.family - ? null - : detectBroadBusinessEvaluation(joinedText); + const broadBusinessEvaluation = compactOrganizationCashflowDisplay + ? { family: "broad_business_evaluation" } + : selectedObjectInventoryExact || counterpartyBidirectionalValueFlow?.family + ? null + : detectBroadBusinessEvaluation(joinedText); const llmIntent = toNonEmptyString(input?.llmPreDecomposeMeta?.predecomposeContract?.intent, deps); const explicitIntentCandidate = broadBusinessEvaluation?.family ? null @@ -381,6 +398,9 @@ function createAssistantTurnMeaningPolicy(deps = {}) { if (broadBusinessEvaluation?.family) { reasonCodes.push("broad_business_evaluation_current_turn_signal"); } + if (compactOrganizationCashflowDisplay) { + reasonCodes.push("compact_cashflow_display_current_turn_signal"); + } if (rawText !== normalizeTurnText(rawMessage, { ...deps, repairAddressMojibake: (value) => String(value ?? "") })) { reasonCodes.push("mojibake_repair_applied"); } diff --git a/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts b/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts index 8375b58..c36bfa3 100644 --- a/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts @@ -139,6 +139,33 @@ function detectCounterpartyTurnoverFamily(text) { }; } +function hasCompactOrganizationCashflowDisplaySignal(text) { + const normalized = String(text ?? ""); + if (!normalized) { + return false; + } + const hasCompactCue = + /(?:\u043a\u043e\u0440\u043e\u0442\u043a\w*|\u043d\u0435\s+\u043e\u0431\u0437\u043e\u0440|\u043f\u0440\u043e\u0441\u0442\u043e\s+\u0434\u0435\u043d\p{L}*|\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u0431\u0435\u0437\s+\u0440\u0430\u0437\u0431\u0438\u0432\p{L}*)/iu.test( + normalized + ); + if (!hasCompactCue) { + return false; + } + const hasMoneyCue = + /(?:\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\u0435\u0436\p{L}*|\u043f\u0440\u0438\u0448\p{L}*|\u0443\u0448\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0441\u043f\u0438\u0441\u0430\p{L}*|\u043d\u0435\u0442\u0442\u043e|cash|money|incoming|outgoing|net)/iu.test( + normalized + ); + const hasOrganizationEarningsCue = + /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|\u0437\u0430\u0440\u0430\u0431\u043e\u0442\p{L}*|\u043f\u0440\u0438\u0448\p{L}*|\u0443\u0448\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0434\u0435\u043d\u0435\u0436\p{L}*\s+\u043d\u0435\u0442\u0442\u043e|how\s+much|earned|received|paid)/iu.test( + normalized + ); + const hasExplicitExclusionCue = + /(?:\u0438\u0441\u043a\u043b\u044e\u0447\p{L}*|\u0443\u0431\u0435\u0440\p{L}*|\u043a\u0440\u043e\u043c\u0435|exclude|excluding|without)[\s\S]{0,80}(?:\u043a\u0440\u0443\u043f\u043d\p{L}*|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\p{L}*|\u043a\u043b\u0438\u0435\u043d\u0442\p{L}*|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a\p{L}*|\u043e\u043f\u0435\u0440\u0430\u0446\p{L}*|counterpart|customer|supplier|operation)/iu.test( + normalized + ); + return hasMoneyCue && hasOrganizationEarningsCue && !hasExplicitExclusionCue; +} + function detectScopedCounterpartyEntity(text) { const patterns = [ /(?:^|[\s,.;:!?])(?:\u043f\u043e|\u0443|\u0434\u043b\u044f|by|for)\s+(.+?)(?=$|[,.;:!?]|\s+(?:\u0437\u0430|\u043d\u0430|\u0432|\u0432\u043e|\u043a|\u043f\u043e|\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a|\u043a\u0430\u043a|\u043a\u0430\u043a\u043e\u0435|\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0430\u044f|\u043a\u0430\u043a\u0438\u0435|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u043d\u0435\u0442\u0442\u043e|\u0441\u0430\u043b\u044c\u0434\u043e|\u0434\u0435\u043d\u0435\u0433|\u0434\u0435\u043d\u0435\u0436\p{L}*|\u043f\u043b\u0430\u0442[\u0435\u0451]\u0436\p{L}*|\u0438\u0441\u0445\u043e\u0434\p{L}*|\u0432\u0445\u043e\u0434\p{L}*)(?=$|[\s,.;:!?]))/iu, @@ -456,12 +483,15 @@ export function createAssistantTurnMeaningPolicy(deps = {}) { const rawText = normalizeTurnText(rawMessage, deps); const effectiveText = normalizeTurnText(effectiveMessage, deps); const joinedText = fallbackCompactWhitespace(`${rawText} ${effectiveText}`); - const supportedIntent = detectSupportedIntent(joinedText, deps); + const compactOrganizationCashflowDisplay = hasCompactOrganizationCashflowDisplaySignal(rawText); + const supportedIntent = compactOrganizationCashflowDisplay ? null : detectSupportedIntent(joinedText, deps); const counterpartyBidirectionalValueFlow = detectCounterpartyBidirectionalValueFlowFamily(joinedText); const counterpartyTurnover = detectCounterpartyTurnoverFamily(joinedText); const selectedObjectInventoryExact = hasSelectedObjectInventoryExactSignal(joinedText); const broadBusinessEvaluation = - selectedObjectInventoryExact || counterpartyBidirectionalValueFlow?.family + compactOrganizationCashflowDisplay + ? { family: "broad_business_evaluation" } + : selectedObjectInventoryExact || counterpartyBidirectionalValueFlow?.family ? null : detectBroadBusinessEvaluation(joinedText); const llmIntent = toNonEmptyString(input?.llmPreDecomposeMeta?.predecomposeContract?.intent, deps); @@ -489,6 +519,9 @@ export function createAssistantTurnMeaningPolicy(deps = {}) { if (broadBusinessEvaluation?.family) { reasonCodes.push("broad_business_evaluation_current_turn_signal"); } + if (compactOrganizationCashflowDisplay) { + reasonCodes.push("compact_cashflow_display_current_turn_signal"); + } if (rawText !== normalizeTurnText(rawMessage, { ...deps, repairAddressMojibake: (value) => String(value ?? "") })) { reasonCodes.push("mojibake_repair_applied"); } diff --git a/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts b/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts index 20eae08..42dd2b3 100644 --- a/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts @@ -193,6 +193,29 @@ describe("assistantTurnMeaningPolicy", () => { expect(meaning.reason_codes).toContain("broad_business_evaluation_current_turn_signal"); }); + it("treats no-tops compact money wording as display modifier, not counterparty exclusion", () => { + const policy = buildPolicy({ + resolveAddressIntent: () => ({ intent: "customer_revenue_and_payments", confidence: "high" }) + }); + + const meaning = policy.resolveAssistantTurnMeaning({ + rawUserMessage: + "\u0430 \u0435\u0441\u043b\u0438 \u043a\u043e\u0440\u043e\u0442\u043a\u043e, \u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0437\u0430\u0440\u0430\u0431\u043e\u0442\u0430\u043b\u0438 \u0434\u0435\u043d\u044c\u0433\u0430\u043c\u0438 \u0431\u0435\u0437 \u0442\u043e\u043f\u043e\u0432?", + effectiveAddressUserMessage: + "\u041e\u043f\u0440\u0435\u0434\u0435\u043b\u0438\u0442\u044c \u0438\u0442\u043e\u0433\u043e\u0432\u044b\u0439 \u0444\u0438\u043d\u0430\u043d\u0441\u043e\u0432\u044b\u0439 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u043f\u043e \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438 \u0441 \u0438\u0441\u043a\u043b\u044e\u0447\u0435\u043d\u0438\u0435\u043c \u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448\u0438\u0445 \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u0439 \u0438\u043b\u0438 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u043e\u0432." + }); + + expect(meaning.explicit_intent_candidate).toBeNull(); + expect(meaning.asked_domain_family).toBe("business_summary"); + expect(meaning.asked_action_family).toBe("broad_evaluation"); + expect(meaning.explicit_entity_candidates).toEqual([]); + expect(meaning.unsupported_but_understood_family).toBe("broad_business_evaluation"); + expect(meaning.stale_replay_forbidden).toBe(true); + expect(meaning.reason_codes).toContain("compact_cashflow_display_current_turn_signal"); + expect(meaning.reason_codes).toContain("broad_business_evaluation_current_turn_signal"); + expect(meaning.reason_codes).not.toContain("counterparty_turnover_current_turn_signal"); + }); + it("treats organization-level earnings and best-year wording as business overview", () => { const policy = buildPolicy({ resolveAddressIntent: () => ({ intent: "customer_revenue_and_payments", confidence: "high" }) diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index e2362cc..7774268 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,53 @@ [ + { + "generation_id": "gen-ag05231310-3d45fe", + "created_at": "2026-05-23T13:10:25+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Cashflow no-tops display modifier", + "count": 4, + "domain": "autonomy_business_answer_contract", + "questions": [ + "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.", + "а если коротко, сколько заработали деньгами без топов?", + "не обзор, просто деньги: пришло, ушло, нетто за 2020" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260523131025_gen-ag05231310-3d45fe.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260523131025_gen-ag05231310-3d45fe.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Targeted AGENT replay: after a 2020 business overview, a compact 'без топов' follow-up must preserve the 2020 company cashflow context and suppress rankings instead of excluding top counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_cashflow_no_tops_20260523.json", + "scenario_id": "agent_cashflow_no_tops_20260523", + "semantic_tags": [ + "bank_boundary", + "baseline", + "business_overview", + "cashflow", + "compact_after_overview", + "direct_answer", + "direct_money_only", + "no_tops_display_modifier", + "ranking_suppression", + "temporal_carryover", + "tops_allowed" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_cashflow_no_tops_live1", + "saved_after_validated_replay": true + } + }, { "generation_id": "gen-ag05231232-9ef058", "created_at": "2026-05-23T12:32:02+00:00", diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523131025_gen-ag05231310-3d45fe.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523131025_gen-ag05231310-3d45fe.json new file mode 100644 index 0000000..b97bfa8 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523131025_gen-ag05231310-3d45fe.json @@ -0,0 +1,137 @@ +{ + "saved_at": "2026-05-23T13:10:25+00:00", + "generation_id": "gen-ag05231310-3d45fe", + "mode": "saved_user_sessions", + "title": "AGENT | Cashflow no-tops display modifier", + "agent_run": true, + "questions": [ + "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.", + "а если коротко, сколько заработали деньгами без топов?", + "не обзор, просто деньги: пришло, ушло, нетто за 2020" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Targeted AGENT replay: after a 2020 business overview, a compact 'без топов' follow-up must preserve the 2020 company cashflow context and suppress rankings instead of excluding top counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_cashflow_no_tops_20260523.json", + "scenario_id": "agent_cashflow_no_tops_20260523", + "semantic_tags": [ + "bank_boundary", + "baseline", + "business_overview", + "cashflow", + "compact_after_overview", + "direct_answer", + "direct_money_only", + "no_tops_display_modifier", + "ranking_suppression", + "temporal_carryover", + "tops_allowed" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_cashflow_no_tops_live1", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_cashflow_no_tops_live1", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 4, + "steps_passed": 4, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "created_at": "2026-05-23T13:10:25+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.", + "created_at": "2026-05-23T13:10:25+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "а если коротко, сколько заработали деньгами без топов?", + "created_at": "2026-05-23T13:10:25+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "не обзор, просто деньги: пришло, ушло, нетто за 2020", + "created_at": "2026-05-23T13:10:25+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Targeted AGENT replay: after a 2020 business overview, a compact 'без топов' follow-up must preserve the 2020 company cashflow context and suppress rankings instead of excluding top counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_cashflow_no_tops_20260523.json", + "scenario_id": "agent_cashflow_no_tops_20260523", + "semantic_tags": [ + "bank_boundary", + "baseline", + "business_overview", + "cashflow", + "compact_after_overview", + "direct_answer", + "direct_money_only", + "no_tops_display_modifier", + "ranking_suppression", + "temporal_carryover", + "tops_allowed" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_cashflow_no_tops_live1", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_cashflow_no_tops_live1", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 4, + "steps_passed": 4, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523131025_gen-ag05231310-3d45fe.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523131025_gen-ag05231310-3d45fe.json new file mode 100644 index 0000000..7b27691 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523131025_gen-ag05231310-3d45fe.json @@ -0,0 +1,37 @@ +{ + "suite_id": "assistant_saved_session_gen-ag05231310-3d45fe", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-05-23T13:10:25+00:00", + "generation_id": "gen-ag05231310-3d45fe", + "mode": "saved_user_sessions", + "title": "AGENT | Cashflow no-tops display modifier", + "domain": "autonomy_business_answer_contract", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Cashflow no-tops display modifier", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет." + }, + { + "user_message": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток." + }, + { + "user_message": "а если коротко, сколько заработали деньгами без топов?" + }, + { + "user_message": "не обзор, просто деньги: пришло, ушло, нетто за 2020" + } + ] + } + ] +}