diff --git a/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md b/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md index b6fc3f9..b0f4aef 100644 --- a/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md +++ b/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md @@ -28,8 +28,8 @@ Current reporting baseline: - Planner Autonomy Consolidation: `100%` for the declared phase83 slice. - Open-World Business Overview implementation breadth: `~99%` through Slice 25. - Open-World Semantic Control Gate: accepted critical subset after EHMO/W5/W7 hardening; fat GUI pack review remains a broad human-pressure gate. -- Route-Candidate-Driven Enablement Loop: `100%`, now regression-gated by phase91-phase97 canaries. -- Open-World Schema/Primitive Discovery: `25%`, first financial-counterparty slice accepted live at `4/4`; next schema/primitive candidate should come from real live/manual replay evidence. +- Route-Candidate-Driven Enablement Loop: `100%`, now regression-gated by phase91-phase98 canaries. +- Open-World Schema/Primitive Discovery: `38%`, financial-counterparty slice accepted live at `4/4` and limit-honesty/business-language gate accepted live at `6/6`; next schema/primitive candidate should come from real live/manual replay evidence. ## Archived Execution Snapshot (2026-04-17) diff --git a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md index efe3538..6913f08 100644 --- a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md @@ -86,11 +86,12 @@ Fresh validation cut: - Completed autonomy slice inside that loop: `Vendor/Procurement Quality Reviewed Route`: `vendor_risk_procurement_quality` now promotes to reviewed procurement-concentration evidence when confirmed outgoing payment, bank-like recipient segregation, non-financial recipient, counterparty-role, and contract-usage signals are reachable; phase95 live replay is accepted. - Completed autonomy slice inside that loop: `Inventory Reserve/Liquidation Quality Reviewed Route`: `inventory_reserve_liquidation_quality` now promotes to reviewed inventory quality-event evidence from posted write-off, receipt-adjustment, stocktaking, and revaluation documents; phase96 live replay is accepted. - Completed broader schema/primitive discovery slice: `Financial Counterparty Flow Hints`: bank-document money-flow recipes expose operation/purpose/comment fields, ranked value-flow buckets carry `financial_flow_hint`, explicit `СБЕРБАНК` wording is not swallowed by supplier/customer tails, and bank-like leaders are bounded away from ordinary supplier/customer overclaim; phase97 live replay is accepted. -- Current live canary: `phase97_financial_counterparty_flow_hints_live4` accepted `4/4`. -- Current accepted autorun: `AGENT | Phase 97 financial counterparty flow hints replay` (`gen-ag05122250-4451a8`). +- Completed broader schema/primitive discovery support slice: `Limit Honesty And Business Language Gate`: compact business-overview replies sanitize route/proxy/MCP-style wording, keep row-limit disclosure relevant to the asked contour, and preserve debt/VAT/bank/inventory/supplier canaries; phase98 live replay is accepted. +- Current live canary: `phase98_limit_honesty_business_language_live3` accepted `6/6`. +- Current accepted autorun: `AGENT | Phase 98 limit honesty and business-language replay` (`gen-ag05122315-f1e27c`). - Implementation breadth: `~99% (Open-World Bounded Autonomy Breadth through Slice 25)`. -- Active broader autonomy module: `Open-World Schema/Primitive Discovery`, with the first `Financial Counterparty Flow Hints` slice accepted and saved. -- Next active slice: select the next unfamiliar 1C ask from live/manual replay evidence, then continue broader schema/primitive discovery while using phase91-phase97 as regression canaries. +- Active broader autonomy module: `Open-World Schema/Primitive Discovery`, with `Financial Counterparty Flow Hints` and `Limit Honesty And Business Language Gate` accepted and saved. +- Next active slice: select the next unfamiliar 1C ask from live/manual replay evidence, then continue broader schema/primitive discovery while using phase91-phase98 as regression canaries. - Operating-layer progress: `~99% (Agentic Semantic Development Loop, accepted dogfood loop + autorun hygiene; manual GUI confirmation still required)`. ## Reporting Rule @@ -103,7 +104,7 @@ Use these labels when reporting progress: - `Прогресс модуля: 99% (Agentic Semantic Development Loop, accepted dogfood loop + autorun hygiene; manual GUI confirmation still required)` when discussing the current development-loop operating layer. - `Прогресс модуля: 100% (Open-World Route Candidate Promotion, declared phase90 slice accepted)` when discussing the route-candidate handoff slice itself. - `Прогресс модуля: 100% (Route-Candidate-Driven Enablement Loop, final reviewed proof-family route accepted; use as regression gate)` when discussing the current candidate-driven enablement loop. -- `Прогресс модуля: 25% (Open-World Schema/Primitive Discovery, first financial-counterparty slice accepted; next schema/primitive candidate pending)` when discussing the current broader schema/primitive discovery module. +- `Прогресс модуля: 38% (Open-World Schema/Primitive Discovery, phase97 financial-counterparty slice and phase98 business-language gate accepted; next schema/primitive candidate pending)` when discussing the current broader schema/primitive discovery module. - `Open-World Business Overview implementation breadth: ~99%, Semantic Control Gate critical subset accepted, fat GUI pack still pending` when discussing only the already wired Slice 25 breadth. - `Прогресс модуля: X% (Open-World Bounded Autonomy Breadth, active slice: )` for later breadth work after the Semantic Control Gate is accepted. @@ -139,7 +140,7 @@ Remaining work belongs to the next breadth module: - confirm the latest autorun Cyrillic hygiene cut in the GUI after backend refresh and inspect frontend/API payloads if old replacement characters remain visible; - continue dogfooding the `Agentic Semantic Development Loop` on real stage packs, especially generated-question quality, semantic business audit, repair handoff, and rerun acceptance; - finish closure of the `Open-World Semantic Control Gate` opened by `assistant-stage1-EHMOy3lNFt`; the EHMO-derived critical subset is accepted live after W5/W7 hardening, but the fat GUI pack and residual answer-shape roughness still need final review; -- extend open-world coverage beyond the reviewed `business_overview` families already wired for money-flow/activity, customer and supplier concentration, document/account-section activity mix, counterparty role split, contract usage, yearly operating-flow dynamics, explicit profit/margin, debt due-date aging, inventory reserve/liquidation quality events, supplier/procurement quality, bank-like financial counterparty role/purpose hints, explicit-period VAT/tax, as-of-date debt and inventory position, open-settlement concentration, contract-date debt age, staleness proxies, trading-margin proxy, sales-to-stock inventory proxy, the missing-proof ledger, and the phase93-phase97 reviewed/canary routes; +- extend open-world coverage beyond the reviewed `business_overview` families already wired for money-flow/activity, customer and supplier concentration, document/account-section activity mix, counterparty role split, contract usage, yearly operating-flow dynamics, explicit profit/margin, debt due-date aging, inventory reserve/liquidation quality events, supplier/procurement quality, bank-like financial counterparty role/purpose hints, business-language/limit-honesty gating, explicit-period VAT/tax, as-of-date debt and inventory position, open-settlement concentration, contract-date debt age, staleness proxies, trading-margin proxy, sales-to-stock inventory proxy, the missing-proof ledger, and the phase93-phase98 reviewed/canary routes; - broader dynamic schema traversal for unfamiliar 1C asks; - more primitive descriptors where live evidence proves a real gap; - more replay-backed domain packs that start from user business meaning, not from route convenience; @@ -161,19 +162,20 @@ For current planning, read: 1. `README.md` 2. this document 3. `31 - inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md` -4. `32 - financial_counterparty_flow_hints_2026-05-13.md` -5. `30 - vendor_procurement_quality_reviewed_route_2026-05-12.md` -6. `29 - debt_due_date_aging_reviewed_route_2026-05-10.md` -7. `28 - accounting_profit_margin_reviewed_route_2026-05-10.md` -8. `27 - proof_family_enablement_candidates_2026-05-10.md` -9. `26 - route_candidate_driven_enablement_loop_2026-05-10.md` -10. `25 - open_world_route_candidate_promotion_2026-05-10.md` -11. `24 - agentic_semantic_development_loop_and_autorun_hygiene_2026-05-10.md` -12. `23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md` -13. `22 - open_world_bounded_autonomy_breadth_2026-05-01.md` -14. `20 - planner_autonomy_consolidation_2026-05-01.md` -15. `19 - inventory_stock_open_world_breadth_proof_2026-05-01.md` -16. `17 - post_f_semantic_integrity_hardening_2026-04-23.md` -17. `16 - data_need_graph_and_open_world_mcp_plan_2026-04-22.md` +4. `33 - limit_honesty_business_language_2026-05-13.md` +5. `32 - financial_counterparty_flow_hints_2026-05-13.md` +6. `30 - vendor_procurement_quality_reviewed_route_2026-05-12.md` +7. `29 - debt_due_date_aging_reviewed_route_2026-05-10.md` +8. `28 - accounting_profit_margin_reviewed_route_2026-05-10.md` +9. `27 - proof_family_enablement_candidates_2026-05-10.md` +10. `26 - route_candidate_driven_enablement_loop_2026-05-10.md` +11. `25 - open_world_route_candidate_promotion_2026-05-10.md` +12. `24 - agentic_semantic_development_loop_and_autorun_hygiene_2026-05-10.md` +13. `23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md` +14. `22 - open_world_bounded_autonomy_breadth_2026-05-01.md` +15. `20 - planner_autonomy_consolidation_2026-05-01.md` +16. `19 - inventory_stock_open_world_breadth_proof_2026-05-01.md` +17. `17 - post_f_semantic_integrity_hardening_2026-04-23.md` +18. `16 - data_need_graph_and_open_world_mcp_plan_2026-04-22.md` Documents `01` through `15` remain valuable, but mostly as the historical architecture trail. diff --git a/docs/ARCH/11 - architecture_turnaround/33 - limit_honesty_business_language_2026-05-13.md b/docs/ARCH/11 - architecture_turnaround/33 - limit_honesty_business_language_2026-05-13.md new file mode 100644 index 0000000..69bb900 --- /dev/null +++ b/docs/ARCH/11 - architecture_turnaround/33 - limit_honesty_business_language_2026-05-13.md @@ -0,0 +1,65 @@ +# 33 - Limit Honesty And Business Language Gate (2026-05-13) + +This note records the second accepted cut in the broader `Open-World Schema/Primitive Discovery` module. + +The trigger came from the real GUI review `assistant-stage1-v2qsm_R0fF`: several answers were factually bounded, but still felt like technical artifacts because they leaked `MCP`, `route`, `proxy`, or irrelevant row-limit wording into user-facing business answers. + +## Scope + +This cut keeps the data and routes unchanged, but hardens the answer surface: + +- compact business-overview replies now pass through a shared business-language sanitizer before they are returned; +- `procurement-concentration route`, `vendor-risk route`, `due-date route`, and `proxy` phrases are converted into business-readable wording; +- debt due-date boundary answers keep the organization in the direct line and explain missing payment terms without route jargon; +- inventory reserve answers no longer pull unrelated incoming/outgoing money-flow limit lines into a warehouse reserve question; +- supplier dependency answers distinguish "checked concentration of outgoing payments" from a complete supplier-risk audit without naming it as a route; +- accepted replay keeps VAT and bank-like counterparty boundary canaries in the same scenario. + +## Validation + +Local checks: + +- `npm.cmd test -- assistantMcpDiscoveryResponseCandidate.test.ts`: `26/26` passed; +- `npm.cmd test -- assistantMcpDiscoveryResponseCandidate.test.ts assistantMcpDiscoveryAnswerAdapter.test.ts assistantMcpDiscoveryPilotExecutor.test.ts addressQueryRuntimeM23.test.ts`: `519/519` passed, `1` skipped; +- `npm.cmd run build`: passed; +- graphify rebuild: `6484` nodes, `14385` edges, `142` communities. + +Live semantic replay: + +- spec: `docs/orchestration/address_truth_harness_phase98_limit_honesty_business_language.json`; +- accepted run: `artifacts/domain_runs/phase98_limit_honesty_business_language_live3`; +- final status: `accepted`, `6/6` passed, `0` warnings, `0` failures; +- accepted user-runnable autorun: `AGENT | Phase 98 limit honesty and business-language replay` (`gen-ag05122315-f1e27c`). + +The accepted replay proves: + +- overdue receivables answers state the organization, date, checked open-settlement amount, and missing payment-term basis in business language; +- short follow-up questions remain short and do not repeat technical limit text; +- VAT remains direct and debug-free; +- top incoming money keeps the bank boundary and does not overclaim bank inflows as ordinary customer revenue; +- inventory reserve answers say the reserve cannot be confirmed exactly and do not drag unrelated money-flow limit warnings into the warehouse answer; +- supplier dependency answers state that exact supplier-risk is not fully confirmed, show the checked concentration leader, and list what remains unproven. + +## Status + +Current module wording: + +`Open-World Schema/Primitive Discovery, completed slice: limit honesty and business-language gate` + +Slice progress: `100%`. + +Broader module progress: `38%`. + +This is not a new 1C primitive by itself. It is a necessary semantic gate for the broader primitive-discovery module: as coverage grows, bounded answers must stay business-readable and must not expose internal route/proxy/debug mechanics. + +## Next Work + +1. Treat phase98 as a regression canary together with phase91-phase97. +2. Pick the next unfamiliar 1C ask from live/manual replay evidence. +3. Prefer the same loop: business meaning -> route candidate/schema surface -> minimal reviewed primitive or answer-shape fix -> live replay -> save autorun only after accepted replay. +4. Keep watching for cases where a technically correct bounded answer still feels like a system artifact rather than a competent business analyst. + +See also: + +- [21 - current_status_canon_2026-05-01.md](./21%20-%20current_status_canon_2026-05-01.md) +- [32 - financial_counterparty_flow_hints_2026-05-13.md](./32%20-%20financial_counterparty_flow_hints_2026-05-13.md) diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md index a8befea..0268086 100644 --- a/docs/ARCH/11 - architecture_turnaround/README.md +++ b/docs/ARCH/11 - architecture_turnaround/README.md @@ -50,6 +50,7 @@ This package answers the next question: 30. [30 - vendor_procurement_quality_reviewed_route_2026-05-12.md](./30%20-%20vendor_procurement_quality_reviewed_route_2026-05-12.md) 31. [31 - inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md](./31%20-%20inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md) 32. [32 - financial_counterparty_flow_hints_2026-05-13.md](./32%20-%20financial_counterparty_flow_hints_2026-05-13.md) +33. [33 - limit_honesty_business_language_2026-05-13.md](./33%20-%20limit_honesty_business_language_2026-05-13.md) ## Current Status Snapshot (2026-05-13) @@ -109,6 +110,8 @@ Status canon for planning: - The accepted user-runnable autorun for that slice is `AGENT | Phase 96 inventory reserve/liquidation quality-events` (`gen-ag05122057-c9786e`). - The first broader schema/primitive discovery slice is now accepted: `financial counterparty flow hints` adds bank-document purpose/operation/comment hints, protects bank-like counterparties from ordinary supplier/customer overclaim, and keeps normal counterparty net-flow canaries healthy; `phase97_financial_counterparty_flow_hints_live4` passed `4/4`. - The accepted user-runnable autorun for that slice is `AGENT | Phase 97 financial counterparty flow hints replay` (`gen-ag05122250-4451a8`). +- The second broader schema/primitive discovery support slice is now accepted: `limit honesty and business-language gate` sanitizes route/proxy/MCP-style answer wording, keeps row-limit disclosure relevant to the asked business contour, and preserves debt/VAT/bank/inventory/supplier canaries; `phase98_limit_honesty_business_language_live3` passed `6/6`. +- The accepted user-runnable autorun for that slice is `AGENT | Phase 98 limit honesty and business-language replay` (`gen-ag05122315-f1e27c`). - The phase94 replay spec was repaired to real UTF-8 Russian before autorun persistence, so the saved user-runnable pack does not repeat the earlier GUI mojibake/card-text regression. - The short source of truth for status wording is [21 - current_status_canon_2026-05-01.md](./21%20-%20current_status_canon_2026-05-01.md). - The current execution spine after EHMO is [23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md](./23%20-%20current_execution_spine_and_semantic_control_gate_2026-05-05.md). @@ -121,6 +124,7 @@ Status canon for planning: - The third reviewed proof-family route slice is [30 - vendor_procurement_quality_reviewed_route_2026-05-12.md](./30%20-%20vendor_procurement_quality_reviewed_route_2026-05-12.md). - The fourth/final reviewed proof-family route slice is [31 - inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md](./31%20-%20inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md). - The first broader schema/primitive discovery slice is [32 - financial_counterparty_flow_hints_2026-05-13.md](./32%20-%20financial_counterparty_flow_hints_2026-05-13.md), now accepted live and saved as a user-runnable AGENT autorun. +- The second broader schema/primitive discovery support slice is [33 - limit_honesty_business_language_2026-05-13.md](./33%20-%20limit_honesty_business_language_2026-05-13.md), now accepted live and saved as a user-runnable AGENT autorun. It now documents a turnaround that is already operational in code, already materially past the acute regression breakpoint, and already moved through bounded MCP autonomy, Post-F hardening, inventory breadth proof, and the declared Planner Autonomy slice: @@ -191,7 +195,7 @@ Current honest status: - Planner Autonomy Consolidation progress: `100%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, truth-harness/acceptance-matrix surfacing, soft divergence warning, `catalog_alignment_ok` acceptance invariant, step-level expected catalog-alignment assertions, phase66 and phase32 spec alignment expectations, AGENT source-catalog surfacing, generated phase83 mixed planner-brain replay spec, checked-source user-facing error sanitation, surface-grounded catalog promotion, and guarded live phase83 acceptance validated. Broader unfamiliar 1C asks are now next-module breadth work rather than an open blocker inside this declared slice - Open-World Route Candidate Promotion progress: `100%` for the declared phase90 slice, with structured `route_candidate` runtime contract, artifact propagation, live semantic replay accepted at `5/5`, and accepted AGENT autorun persistence; broader autonomous route enablement remains the next active slice - Route-Candidate-Driven Enablement Loop progress: `100%`, with deterministic repair-target grouping, Lead Codex handoff surfacing, local tooling tests, live phase91 canary acceptance, phase92 proof-family candidates accepted/saved as a user-runnable AGENT autorun, `accounting_profit_margin` promoted into reviewed 90/91/99 execution by phase93 live replay, `debt_due_date_aging_quality` promoted into reviewed payment-term/open-balance execution by phase94 live replay, `vendor_risk_procurement_quality` promoted into reviewed procurement-concentration evidence by phase95 live replay, and `inventory_reserve_liquidation_quality` promoted into reviewed inventory quality-event evidence by phase96 live replay; the declared route-candidate-driven enablement loop is now closed and should be used as a regression gate for the next broader autonomy slice -- Open-World Schema/Primitive Discovery progress: `25%`, first slice `financial counterparty flow hints` accepted live at `4/4`; bank-document money-flow recipes expose operation/purpose/comment fields, ranked value-flow buckets carry `financial_flow_hint`, Sberbank-like leaders are bounded away from name-only supplier/customer overclaim, and the next slice should be selected from real unfamiliar 1C asks rather than synthetic domain wish lists. +- Open-World Schema/Primitive Discovery progress: `38%`, with `financial counterparty flow hints` accepted live at `4/4` and `limit honesty/business language` accepted live at `6/6`; bank-document money-flow recipes expose operation/purpose/comment fields, ranked value-flow buckets carry `financial_flow_hint`, Sberbank-like leaders are bounded away from name-only supplier/customer overclaim, route/proxy/MCP-style answer wording is sanitized, and the next slice should be selected from real unfamiliar 1C asks rather than synthetic domain wish lists. - graph snapshot after latest rebuild: see `graphify-out/GRAPH_REPORT.md` - current regression-gate breakpoint: - the validated hot paths are no longer structurally broken; @@ -283,6 +287,7 @@ Latest live proof now includes: - vendor/procurement quality reviewed route accepted locally/live: executor/runtime bridge/answer/candidate tests passed `118/118` with `1` skipped, build passed; `phase95_vendor_procurement_quality_reviewed_route_live2` accepted `7/7`; `vendor_risk_procurement_quality` now derives reviewed procurement-concentration evidence from confirmed outgoing payment rows, separates bank-like outgoing leaders from ordinary supplier dependency, removes the proof family from `missing_proof_families` when this reviewed evidence exists, and can promote `vendor_risk_procurement_boundary` route candidates to `ready_for_reviewed_execution`; the accepted autorun is `AGENT | Phase 95 vendor/procurement quality reviewed route` (`gen-ag05121357-9ea5d6`). - inventory reserve/liquidation quality reviewed route accepted locally/live: answer/runtime/candidate tests passed `84/84` with `1` skipped, pilot-executor tests passed `34/34`, build passed; direct MCP query for `address_inventory_quality_events_for_organization_v1` returned `fetched_rows=0`, `matched_rows=0`, `error=null`; `phase96_inventory_reserve_liquidation_quality_rerun` accepted `2/2`; `inventory_reserve_liquidation_quality` now derives reviewed evidence from posted write-off, receipt-adjustment, stocktaking, and revaluation documents, removes the proof family from `missing_proof_families` when this reviewed route executes, anchors the organization in the direct answer, and can promote `inventory_reserve_boundary` route candidates to `ready_for_reviewed_execution`; the accepted autorun is `AGENT | Phase 96 inventory reserve/liquidation quality-events` (`gen-ag05122057-c9786e`). - financial counterparty flow hints accepted locally/live: targeted bank-flow/intent/turn-input/answer tests passed `554/554` with `7` skipped, build passed, graphify rebuilt to `6483` nodes, `14382` edges, `143` communities; `phase97_financial_counterparty_flow_hints_live4` accepted `4/4`, proving explicit `СБЕРБАНК` wording, bank-operation purpose/direction disclosure, incoming-bank no-overclaim, business-overview bank boundaries, and `Группа СВК` net-flow canary continuity; the accepted autorun is `AGENT | Phase 97 financial counterparty flow hints replay` (`gen-ag05122250-4451a8`). +- limit honesty and business-language gate accepted locally/live: response-candidate/answer-adapter/pilot-executor/M23 tests passed `519/519` with `1` skipped, build passed, graphify rebuilt to `6484` nodes, `14385` edges, `142` communities; `phase98_limit_honesty_business_language_live3` accepted `6/6`, proving debt due-date boundary, short follow-up directness, VAT debug hygiene, top incoming bank boundary, inventory reserve boundary language, and supplier dependency language together; the accepted autorun is `AGENT | Phase 98 limit honesty and business-language replay` (`gen-ag05122315-f1e27c`). Current architectural reading: @@ -364,6 +369,7 @@ Read in this order: 31. `30 - vendor_procurement_quality_reviewed_route_2026-05-12.md` 32. `31 - inventory_reserve_liquidation_quality_reviewed_route_2026-05-12.md` 33. `32 - financial_counterparty_flow_hints_2026-05-13.md` +34. `33 - limit_honesty_business_language_2026-05-13.md` ## Planning Rules diff --git a/docs/orchestration/address_truth_harness_phase98_limit_honesty_business_language.json b/docs/orchestration/address_truth_harness_phase98_limit_honesty_business_language.json new file mode 100644 index 0000000..42210df --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase98_limit_honesty_business_language.json @@ -0,0 +1,205 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase98_limit_honesty_business_language", + "domain": "address_phase98_limit_honesty_business_language", + "title": "Phase 98 limit honesty and business-language replay", + "description": "Focused semantic replay from assistant-stage1-v2qsm_R0fF: answers may be bounded, but they must stay business-readable, direct-first, and must not leak MCP/proxy/route/debug wording when explaining row limits, incomplete coverage, debt due-date proof, inventory reserve proof, supplier dependency, VAT, or bank-like counterparties.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_debt_overdue_exactness_boundary", + "title": "Overdue receivables answer explains exactness boundary in business language", + "question": "По ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)альтернатива", + "(?i)2020|31\\.12\\.2020|конец 2020", + "(?i)дебитор|расчет|долг", + "(?i)срок оплат|просроч|не подтвержд|нельзя точно" + ], + "forbidden_answer_patterns": [ + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)уперл.*лимит", + "(?i)лимит выборки" + ], + "criticality": "critical", + "semantic_tags": [ + "debt_due_date_aging_quality", + "limit_honesty", + "business_language" + ] + }, + { + "step_id": "step_02_short_followup_keeps_short_business_answer", + "title": "Short why follow-up stays short and avoids repeated technical limitation text", + "question": "То есть просрочку доказать нельзя, коротко почему?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)коротко|нельзя|не подтвержд|не доказ", + "(?i)срок оплат|дата оплат|услови", + "(?i)просроч" + ], + "forbidden_answer_patterns": [ + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)уперл.*лимит", + "(?i)лимит выборки" + ], + "criticality": "critical", + "semantic_tags": [ + "followup_directness", + "debt_due_date_aging_quality", + "business_language" + ] + }, + { + "step_id": "step_03_vat_direct_answer_without_debug_leak", + "title": "VAT answer remains direct and does not leak debug mechanics", + "question": "НДС за 2020 по ООО Альтернатива Плюс какой?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)ндс", + "(?i)2020", + "(?i)альтернатива", + "(?i)к уплат|нетто|книга продаж|книга покупок|вычет" + ], + "forbidden_answer_patterns": [ + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)snapshot_items", + "(?i)answer_object" + ], + "criticality": "critical", + "semantic_tags": [ + "vat", + "business_language", + "debug_leak_guard" + ] + }, + { + "step_id": "step_04_top_incoming_money_keeps_bank_boundary", + "title": "Top incoming money answer keeps bank boundary and avoids ordinary-customer overclaim", + "question": "А кто принес больше всего денег за 2020?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)деньг|поступлен|входящ", + "(?i)сбербанк|свк|контрагент", + "(?i)банк|не.*обычн|не.*клиент|финансов|провер" + ], + "forbidden_answer_patterns": [ + "(?i)сбербанк.*обычн.*клиент", + "(?i)сбербанк.*главн.*клиент", + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)уперл.*лимит", + "(?i)лимит выборки" + ], + "criticality": "critical", + "semantic_tags": [ + "financial_counterparty_flow_hint", + "customer_revenue_and_payments", + "limit_honesty" + ] + }, + { + "step_id": "step_05_inventory_reserve_boundary_business_language", + "title": "Inventory reserve boundary uses business terms, not proxy jargon", + "question": "По ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)резерв|неликвид|склад", + "(?i)не подтвержд|нельзя точно|точно.*нельзя|нет подтвержден", + "(?i)списан|резерв|ликвидац|залежал|стар" + ], + "forbidden_answer_patterns": [ + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)уперл.*лимит", + "(?i)лимит выборки" + ], + "criticality": "critical", + "semantic_tags": [ + "inventory_reserve_liquidation_quality", + "business_language", + "limit_honesty" + ] + }, + { + "step_id": "step_06_supplier_dependency_bank_boundary", + "title": "Supplier dependency answer separates bank-like recipient from supplier dependency", + "question": "А зависимость от одного поставщика за 2020 можно точно оценить?", + "allowed_reply_types": [ + "factual", + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)поставщик|зависим|закуп|исходящ", + "(?i)2020", + "(?i)крупнейш|получател|концентрац", + "(?i)точн|не подтвержд|не доказ|не полностью", + "(?i)надежност|качество|договор|полная структура" + ], + "forbidden_answer_patterns": [ + "(?i)сбербанк.*обычн.*поставщик", + "(?i)сбербанк.*главн.*поставщик", + "(?i)\\bMCP\\b", + "(?i)proxy", + "(?i)route_candidate", + "(?i)primitive", + "(?i)planner_", + "(?i)catalog_", + "(?i)уперл.*лимит", + "(?i)лимит выборки" + ], + "criticality": "critical", + "semantic_tags": [ + "vendor_risk_procurement_quality", + "financial_counterparty_flow_hint", + "business_language" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js index 149ac19..e1e8f04 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js @@ -86,7 +86,7 @@ function userFacingLines(values) { return uniqueStrings(values).filter((line) => !hasInternalMechanics(line)); } function sanitizeUserFacingMechanics(value) { - return String(value ?? "").replace(/MCP-срез(?:ом|у|е|а)?/giu, (match) => { + let text = String(value ?? "").replace(/MCP-срез(?:ом|у|е|а)?/giu, (match) => { const normalized = match.toLowerCase(); if (normalized.endsWith("ом")) { return "срезом 1С"; @@ -102,6 +102,28 @@ function sanitizeUserFacingMechanics(value) { } return "срез 1С"; }); + const replacements = [ + [/\bprocurement-concentration route\b/giu, "проверка концентрации закупок/исходящих платежей"], + [/\breviewed vendor-risk route\b/giu, "отдельная проверка поставщицкого риска"], + [/\bvendor-risk route\b/giu, "проверка поставщицкого риска"], + [/\bdue-date route\b/giu, "проверка просрочки по срокам оплаты"], + [/\bdebt-quality proxy\b/giu, "ограниченный долговой сигнал"], + [/\bstaleness-risk proxy\b/giu, "косвенный признак залежалости"], + [/\bstaleness risk proxy\b/giu, "косвенный признак залежалости"], + [/\boperating-flow proxy\b/giu, "денежный операционный показатель"], + [/\btrading-margin proxy\b/giu, "товарная маржинальность по проверенным документам"], + [/\bprocurement concentration proxy\b/giu, "сигнал концентрации закупок/исходящих платежей"], + [/\boutgoing cash concentration proxy\b/giu, "сигнал концентрации исходящих денег"], + [/\bproxy-сигналы\b/giu, "косвенные признаки"], + [/\bproxy\b/giu, "косвенный показатель"], + [/\bsales-to-stock\b/giu, "отношение продаж к остатку"], + [/\boverdue\/due-date aging\b/giu, "просрочку по договорным срокам"], + [/\bP&L\b/gu, "полный отчет о прибылях и убытках"] + ]; + for (const [pattern, replacement] of replacements) { + text = text.replace(pattern, replacement); + } + return text; } function localizeLine(value) { const sanitizedValue = sanitizeUserFacingMechanics(value); @@ -385,6 +407,10 @@ function businessOverviewCoverageLimitLine(overview) { ? `Важно: по направлению ${limited.join(" и ")} проверка достигла лимита строк; это расширенный проверенный срез найденных строк, но не гарантия полного бухгалтерского оборота без отдельной полной выгрузки.` : null; } +function joinBusinessReplyLines(lines) { + const reply = userFacingLines(lines.map(localizeLine)).join("\n").trim(); + return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; +} function businessOverviewYearRowsLine(overview) { const years = Array.isArray(overview.yearly_breakdown) ? overview.yearly_breakdown : []; const values = years @@ -577,8 +603,7 @@ function buildCompactBidirectionalValueFlowReply(entryPoint, draft) { if (fallbackNextStep) { lines.push(`Следующий шаг: ${localizeLine(fallbackNextStep)}`); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } function compactComparable(value) { return String(value ?? "") @@ -748,8 +773,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { : "сумма не распознана"; lines.push(`Коротко: по бухгалтерскому маршруту 90/91/99 за ${periodScope} подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к выручке 90.01 ${marginPct}` : "; маржа к выручке 90.01 не рассчитана"}.`); lines.push("Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const headline = toNonEmptyString(draft.headline); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); @@ -770,8 +794,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { if (limitLine) { lines.push(limitLine); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (debtDueDateBoundary) { const dueDateAging = toRecordObject(overview.debt_due_date_aging); @@ -786,36 +809,35 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { const rowsWithAmount = typeof dueDateAging.rows_with_amount === "number" && Number.isFinite(dueDateAging.rows_with_amount) ? dueDateAging.rows_with_amount : null; + const dueDateScopePrefix = organizationScope ? `по компании ${organizationScope} ` : ""; if (status === "confirmed_overdue") { - lines.push(`Коротко: на ${asOfDate} подтвержденная просрочка есть: ${overdueAmount ?? "сумма не распознана"} по ${dueDateAging.overdue_rows ?? "найденным"} строкам.`); - lines.push("Основа ответа: открытые расчеты 60/62/76, договорный срок оплаты и дата расчетного документа; это уже due-date route, не старение договора как proxy."); + lines.push(`Коротко: ${dueDateScopePrefix}на ${asOfDate} подтвержденная просрочка есть: ${overdueAmount ?? "сумма не распознана"} по ${dueDateAging.overdue_rows ?? "найденным"} строкам.`); + lines.push("Основа ответа: открытые расчеты 60/62/76, договорный срок оплаты и дата расчетного документа; это проверка просрочки по срокам оплаты, а не просто возраст договора."); } else if (status === "no_payment_terms_configured") { - lines.push(`Коротко: на ${asOfDate} подтвержденной просрочки нет: открытые расчеты проверены${grossAmount ? ` на ${grossAmount}` : ""}, но в найденных договорах срок оплаты не установлен.`); + lines.push(`Коротко: ${dueDateScopePrefix}на ${asOfDate} подтвержденной просрочки нет: открытые расчеты проверены${grossAmount ? ` на ${grossAmount}` : ""}, но в найденных договорах срок оплаты не установлен.`); lines.push(rowsWithAmount !== null ? `Проверено строк с суммой: ${rowsWithAmount}. Без установленного срока оплаты нельзя честно назвать эти остатки просрочкой.` : "Без установленного срока оплаты нельзя честно назвать эти остатки просрочкой."); } else if (status === "insufficient_due_date_basis") { - lines.push(`Коротко: due-date route запущен на ${asOfDate}, но просрочка не подтверждена: по строкам с установленным сроком оплаты не хватило даты расчетного документа.`); + lines.push(`Коротко: ${dueDateScopePrefix}на ${asOfDate} просрочка не подтверждена: по строкам с установленным сроком оплаты не хватило даты расчетного документа.`); if (rowsWithPaymentTerms !== null) { - lines.push(`Строк с установленным сроком оплаты: ${rowsWithPaymentTerms}; нужен документ-основание с датой для расчета due date.`); + lines.push(`Строк с установленным сроком оплаты: ${rowsWithPaymentTerms}; нужен документ-основание с датой, чтобы посчитать договорный срок оплаты.`); } } else { - lines.push(`Коротко: due-date route на ${asOfDate} проверен, подтвержденной просрочки не найдено${rowsWithPaymentTerms !== null ? `; строк с установленным сроком оплаты ${rowsWithPaymentTerms}` : ""}.`); + lines.push(`Коротко: ${dueDateScopePrefix}на ${asOfDate} проверка просрочки по срокам оплаты выполнена, подтвержденной просрочки не найдено${rowsWithPaymentTerms !== null ? `; строк с установленным сроком оплаты ${rowsWithPaymentTerms}` : ""}.`); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const headline = toNonEmptyString(draft.headline); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); lines.push(cleanHeadline ? `Коротко: ${localizeLine(cleanHeadline)}` - : "Коротко: нельзя точно определить, какая дебиторка просрочена, по текущему срезу 1С; есть только debt-quality proxy, но нет проверенного due-date маршрута."); - lines.push("Проверить нужно отдельно: договоры, сроки оплаты, погашение и закрытие задолженности; без этого нельзя доказать overdue/due-date aging."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + : "Коротко: нельзя точно определить, какая дебиторка просрочена, по текущему срезу 1С; есть только ограниченный долговой сигнал, но нет проверки договорных сроков оплаты."); + lines.push("Проверить нужно отдельно: договоры, сроки оплаты, погашение и закрытие задолженности; без этого нельзя доказать просрочку по договорным срокам."); + return joinBusinessReplyLines(lines); } if (vendorRiskBoundary) { const vendorProcurementQuality = toRecordObject(overview.vendor_procurement_quality); @@ -839,7 +861,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { const periodScope = toNonEmptyString(vendorProcurementQuality.period_scope) ?? period; const totalText = totalOutgoing ? `; всего исходящих платежей в проверенном срезе ${totalOutgoing}` : ""; if (status === "financial_institution_leads_outgoing_cash") { - lines.push(`Коротко: проверенный procurement-concentration route за ${periodScope} не подтверждает зависимость от обычного поставщика: крупнейший получатель исходящих денег ${topOutgoingName ?? "не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : ""}, но по названию это банк/финансовая организация${totalText}.`); + lines.push(`Коротко: проверка концентрации закупок/исходящих платежей за ${periodScope} не подтверждает зависимость от обычного поставщика: крупнейший получатель исходящих денег ${topOutgoingName ?? "не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : ""}, но по названию это банк/финансовая организация${totalText}.`); const financialHintText = financialFlowHintTextRuFromRecord(topOutgoingRecord); if (financialHintText) { lines.push(financialHintText); @@ -849,19 +871,18 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { } } else if (status === "reviewed_procurement_concentration") { - lines.push(`Коротко: проверенный procurement-concentration route за ${periodScope} нашел основную зависимость исходящего потока: ${topOutgoingName ?? nonFinancialName ?? "получатель не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : nonFinancialShare ? ` держит около ${nonFinancialShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : nonFinancialAmount ? ` (${nonFinancialAmount})` : ""}${totalText}.`); + lines.push(`Коротко: точный риск зависимости от одного поставщика не подтвержден полностью; проверка концентрации закупок/исходящих платежей за ${periodScope} нашла крупнейшего получателя исходящего потока: ${topOutgoingName ?? nonFinancialName ?? "получатель не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : nonFinancialShare ? ` держит около ${nonFinancialShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : nonFinancialAmount ? ` (${nonFinancialAmount})` : ""}${totalText}.`); } else { - lines.push(`Коротко: procurement-concentration route за ${periodScope} отработал, но надежной небанковской концентрации поставщика по найденным исходящим платежам не хватает${totalText}.`); + lines.push(`Коротко: проверка концентрации закупок/исходящих платежей за ${periodScope} выполнена, но надежной небанковской концентрации поставщика по найденным исходящим платежам не хватает${totalText}.`); } const contractText = typeof vendorProcurementQuality.used_contracts === "number" && Number.isFinite(vendorProcurementQuality.used_contracts) ? typeof vendorProcurementQuality.total_contracts === "number" && Number.isFinite(vendorProcurementQuality.total_contracts) ? ` Договорный профиль: используется ${vendorProcurementQuality.used_contracts}/${vendorProcurementQuality.total_contracts} договоров${typeof vendorProcurementQuality.used_contract_share_pct === "number" && Number.isFinite(vendorProcurementQuality.used_contract_share_pct) ? ` (${vendorProcurementQuality.used_contract_share_pct}%)` : ""}.` : ` Договорный профиль: используется ${vendorProcurementQuality.used_contracts} договоров.` : ""; - lines.push(`Что не доказано этим маршрутом: надежность поставщика, качество поставок, договорные условия, назначение каждого платежа и полная структура всех расходов.${contractText}`); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + lines.push(`Что не доказано этим срезом: надежность поставщика, качество поставок, договорные условия, назначение каждого платежа и полная структура всех расходов.${contractText}`); + return joinBusinessReplyLines(lines); } const supplierBasis = topSupplier ? topSupplierLooksFinancial @@ -870,26 +891,24 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { : outgoingAmount ? `исходящие платежи/закупочный поток в проверенном срезе: ${outgoingAmount}` : "есть только ограниченный срез исходящих платежей без полного vendor-risk профиля"; - const proxyLabel = topSupplierLooksFinancial ? "outgoing cash concentration proxy" : "procurement concentration proxy"; + const proxyLabel = topSupplierLooksFinancial + ? "сигнал концентрации исходящих денег" + : "сигнал концентрации закупок/исходящих платежей"; lines.push(`Коротко: точный риск зависимости от одного поставщика по текущим данным не подтвержден; есть только ${proxyLabel}: ${supplierBasis}.`); lines.push("Это сигнал концентрации закупок/исходящих платежей, а не полный аудит надежности поставщиков, условий, качества и структуры всех расходов."); - lines.push("Для точного вывода нужен отдельный reviewed vendor-risk route: поставщики, договорные условия, качество поставок, сроки, доля в закупках и полная структура расходов."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + lines.push("Для точного вывода нужна отдельная проверка поставщицкого риска: поставщики, договорные условия, качество поставок, сроки, доля в закупках и полная структура расходов."); + return joinBusinessReplyLines(lines); } if (inventoryReserveBoundary) { const headline = toNonEmptyString(draft.headline); const inventoryQualityEvents = toRecordObject(overview.inventory_quality_events); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); - lines.push(cleanHeadline - ? `Коротко: ${localizeLine(cleanHeadline)}` + const reserveBasis = cleanHeadline ? localizeLine(cleanHeadline).replace(/^проверил/iu, "Проверены") : null; + lines.push(reserveBasis + ? `Коротко: точно подтвердить резерв под неликвиды нельзя. ${reserveBasis}` : "Коротко: точно подтвердить резерв под неликвиды по текущим данным нельзя."); if (inventoryQualityEvents) { - if (limitLine) { - lines.push(limitLine); - } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const boundaryLines = userFacingLines([ ...toStringList(draft.unknown_lines), @@ -900,9 +919,8 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { if (boundaryLines.length > 0) { lines.push(...boundaryLines.map(localizeLine)); } - lines.push("Проверить нужно отдельно: складской срез на дату, учетную политику резервов, списания и ликвидационную стоимость; proxy-сигналы нельзя выдавать за доказанный факт резерва."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + lines.push("Проверить нужно отдельно: складской срез на дату, учетную политику резервов, списания и ликвидационную стоимость; косвенные признаки нельзя выдавать за доказанный факт резерва."); + return joinBusinessReplyLines(lines); } if (crossScopeExecutiveSummary && separateSubject && previousCounterpartySummary && (incomingAmount || outgoingAmount || netAmount)) { lines.push(`Коротко: по компании ${organizationScope ?? "в выбранном контуре"} ${period} подтвержден денежный срез: получили ${incomingAmount ?? "0 руб."}, исходящие платежи/списания ${outgoingAmount ?? "0 руб."}, ${netDirection} ${sentenceAmount(netAmount) ?? netAmount ?? "0 руб."}${previousCounterpartySummary.lead}; можно утверждать только эти подтвержденные срезы, нельзя называть это чистой прибылью, полным оборотом или доказанной ролью главного клиента/поставщика.`); @@ -912,8 +930,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { if (limitLine) { lines.push(limitLine); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (rankingNeed) { const incomingLeader = strongestIncomingYear(overview); @@ -1005,8 +1022,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { lines.push(limitLine); } lines.push("Для ответа именно про чистую прибыль нужно отдельно считать себестоимость, расходы и закрытие периода."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } function statusFrom(entryPoint) { if (!entryPoint || entryPoint.entry_status === "skipped_not_applicable") { diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts index 7480f8a..540781c 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts @@ -120,7 +120,7 @@ function userFacingLines(values: string[]): string[] { } function sanitizeUserFacingMechanics(value: string): string { - return String(value ?? "").replace(/MCP-срез(?:ом|у|е|а)?/giu, (match) => { + let text = String(value ?? "").replace(/MCP-срез(?:ом|у|е|а)?/giu, (match) => { const normalized = match.toLowerCase(); if (normalized.endsWith("ом")) { return "срезом 1С"; @@ -136,6 +136,28 @@ function sanitizeUserFacingMechanics(value: string): string { } return "срез 1С"; }); + const replacements: Array<[RegExp, string]> = [ + [/\bprocurement-concentration route\b/giu, "проверка концентрации закупок/исходящих платежей"], + [/\breviewed vendor-risk route\b/giu, "отдельная проверка поставщицкого риска"], + [/\bvendor-risk route\b/giu, "проверка поставщицкого риска"], + [/\bdue-date route\b/giu, "проверка просрочки по срокам оплаты"], + [/\bdebt-quality proxy\b/giu, "ограниченный долговой сигнал"], + [/\bstaleness-risk proxy\b/giu, "косвенный признак залежалости"], + [/\bstaleness risk proxy\b/giu, "косвенный признак залежалости"], + [/\boperating-flow proxy\b/giu, "денежный операционный показатель"], + [/\btrading-margin proxy\b/giu, "товарная маржинальность по проверенным документам"], + [/\bprocurement concentration proxy\b/giu, "сигнал концентрации закупок/исходящих платежей"], + [/\boutgoing cash concentration proxy\b/giu, "сигнал концентрации исходящих денег"], + [/\bproxy-сигналы\b/giu, "косвенные признаки"], + [/\bproxy\b/giu, "косвенный показатель"], + [/\bsales-to-stock\b/giu, "отношение продаж к остатку"], + [/\boverdue\/due-date aging\b/giu, "просрочку по договорным срокам"], + [/\bP&L\b/gu, "полный отчет о прибылях и убытках"] + ]; + for (const [pattern, replacement] of replacements) { + text = text.replace(pattern, replacement); + } + return text; } function localizeLine(value: string): string { @@ -454,6 +476,11 @@ function businessOverviewCoverageLimitLine(overview: Record): s : null; } +function joinBusinessReplyLines(lines: string[]): string | null { + const reply = userFacingLines(lines.map(localizeLine)).join("\n").trim(); + return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; +} + function businessOverviewYearRowsLine(overview: Record): string | null { const years = Array.isArray(overview.yearly_breakdown) ? overview.yearly_breakdown : []; const values = years @@ -670,8 +697,7 @@ function buildCompactBidirectionalValueFlowReply( lines.push(`Следующий шаг: ${localizeLine(fallbackNextStep)}`); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } function compactComparable(value: string | null): string { @@ -883,8 +909,7 @@ function buildCompactBusinessOverviewReply( lines.push( "Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность." ); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const headline = toNonEmptyString(draft.headline); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); @@ -909,8 +934,7 @@ function buildCompactBusinessOverviewReply( if (limitLine) { lines.push(limitLine); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (debtDueDateBoundary) { @@ -928,14 +952,15 @@ function buildCompactBusinessOverviewReply( typeof dueDateAging.rows_with_amount === "number" && Number.isFinite(dueDateAging.rows_with_amount) ? dueDateAging.rows_with_amount : null; + const dueDateScopePrefix = organizationScope ? `по компании ${organizationScope} ` : ""; if (status === "confirmed_overdue") { lines.push( - `Коротко: на ${asOfDate} подтвержденная просрочка есть: ${overdueAmount ?? "сумма не распознана"} по ${dueDateAging.overdue_rows ?? "найденным"} строкам.` + `Коротко: ${dueDateScopePrefix}на ${asOfDate} подтвержденная просрочка есть: ${overdueAmount ?? "сумма не распознана"} по ${dueDateAging.overdue_rows ?? "найденным"} строкам.` ); - lines.push("Основа ответа: открытые расчеты 60/62/76, договорный срок оплаты и дата расчетного документа; это уже due-date route, не старение договора как proxy."); + lines.push("Основа ответа: открытые расчеты 60/62/76, договорный срок оплаты и дата расчетного документа; это проверка просрочки по срокам оплаты, а не просто возраст договора."); } else if (status === "no_payment_terms_configured") { lines.push( - `Коротко: на ${asOfDate} подтвержденной просрочки нет: открытые расчеты проверены${grossAmount ? ` на ${grossAmount}` : ""}, но в найденных договорах срок оплаты не установлен.` + `Коротко: ${dueDateScopePrefix}на ${asOfDate} подтвержденной просрочки нет: открытые расчеты проверены${grossAmount ? ` на ${grossAmount}` : ""}, но в найденных договорах срок оплаты не установлен.` ); lines.push( rowsWithAmount !== null @@ -944,31 +969,29 @@ function buildCompactBusinessOverviewReply( ); } else if (status === "insufficient_due_date_basis") { lines.push( - `Коротко: due-date route запущен на ${asOfDate}, но просрочка не подтверждена: по строкам с установленным сроком оплаты не хватило даты расчетного документа.` + `Коротко: ${dueDateScopePrefix}на ${asOfDate} просрочка не подтверждена: по строкам с установленным сроком оплаты не хватило даты расчетного документа.` ); if (rowsWithPaymentTerms !== null) { - lines.push(`Строк с установленным сроком оплаты: ${rowsWithPaymentTerms}; нужен документ-основание с датой для расчета due date.`); + lines.push(`Строк с установленным сроком оплаты: ${rowsWithPaymentTerms}; нужен документ-основание с датой, чтобы посчитать договорный срок оплаты.`); } } else { lines.push( - `Коротко: due-date route на ${asOfDate} проверен, подтвержденной просрочки не найдено${rowsWithPaymentTerms !== null ? `; строк с установленным сроком оплаты ${rowsWithPaymentTerms}` : ""}.` + `Коротко: ${dueDateScopePrefix}на ${asOfDate} проверка просрочки по срокам оплаты выполнена, подтвержденной просрочки не найдено${rowsWithPaymentTerms !== null ? `; строк с установленным сроком оплаты ${rowsWithPaymentTerms}` : ""}.` ); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const headline = toNonEmptyString(draft.headline); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); lines.push( cleanHeadline ? `Коротко: ${localizeLine(cleanHeadline)}` - : "Коротко: нельзя точно определить, какая дебиторка просрочена, по текущему срезу 1С; есть только debt-quality proxy, но нет проверенного due-date маршрута." + : "Коротко: нельзя точно определить, какая дебиторка просрочена, по текущему срезу 1С; есть только ограниченный долговой сигнал, но нет проверки договорных сроков оплаты." ); lines.push( - "Проверить нужно отдельно: договоры, сроки оплаты, погашение и закрытие задолженности; без этого нельзя доказать overdue/due-date aging." + "Проверить нужно отдельно: договоры, сроки оплаты, погашение и закрытие задолженности; без этого нельзя доказать просрочку по договорным срокам." ); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (vendorRiskBoundary) { @@ -996,7 +1019,7 @@ function buildCompactBusinessOverviewReply( const totalText = totalOutgoing ? `; всего исходящих платежей в проверенном срезе ${totalOutgoing}` : ""; if (status === "financial_institution_leads_outgoing_cash") { lines.push( - `Коротко: проверенный procurement-concentration route за ${periodScope} не подтверждает зависимость от обычного поставщика: крупнейший получатель исходящих денег ${topOutgoingName ?? "не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : ""}, но по названию это банк/финансовая организация${totalText}.` + `Коротко: проверка концентрации закупок/исходящих платежей за ${periodScope} не подтверждает зависимость от обычного поставщика: крупнейший получатель исходящих денег ${topOutgoingName ?? "не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : ""}, но по названию это банк/финансовая организация${totalText}.` ); const financialHintText = financialFlowHintTextRuFromRecord(topOutgoingRecord); if (financialHintText) { @@ -1009,11 +1032,11 @@ function buildCompactBusinessOverviewReply( } } else if (status === "reviewed_procurement_concentration") { lines.push( - `Коротко: проверенный procurement-concentration route за ${periodScope} нашел основную зависимость исходящего потока: ${topOutgoingName ?? nonFinancialName ?? "получатель не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : nonFinancialShare ? ` держит около ${nonFinancialShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : nonFinancialAmount ? ` (${nonFinancialAmount})` : ""}${totalText}.` + `Коротко: точный риск зависимости от одного поставщика не подтвержден полностью; проверка концентрации закупок/исходящих платежей за ${periodScope} нашла крупнейшего получателя исходящего потока: ${topOutgoingName ?? nonFinancialName ?? "получатель не распознан"}${topOutgoingShare ? ` держит около ${topOutgoingShare}` : nonFinancialShare ? ` держит около ${nonFinancialShare}` : ""}${topOutgoingAmount ? ` (${topOutgoingAmount})` : nonFinancialAmount ? ` (${nonFinancialAmount})` : ""}${totalText}.` ); } else { lines.push( - `Коротко: procurement-concentration route за ${periodScope} отработал, но надежной небанковской концентрации поставщика по найденным исходящим платежам не хватает${totalText}.` + `Коротко: проверка концентрации закупок/исходящих платежей за ${periodScope} выполнена, но надежной небанковской концентрации поставщика по найденным исходящим платежам не хватает${totalText}.` ); } const contractText = @@ -1023,10 +1046,9 @@ function buildCompactBusinessOverviewReply( : ` Договорный профиль: используется ${vendorProcurementQuality.used_contracts} договоров.` : ""; lines.push( - `Что не доказано этим маршрутом: надежность поставщика, качество поставок, договорные условия, назначение каждого платежа и полная структура всех расходов.${contractText}` + `Что не доказано этим срезом: надежность поставщика, качество поставок, договорные условия, назначение каждого платежа и полная структура всех расходов.${contractText}` ); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const supplierBasis = topSupplier ? topSupplierLooksFinancial @@ -1035,7 +1057,9 @@ function buildCompactBusinessOverviewReply( : outgoingAmount ? `исходящие платежи/закупочный поток в проверенном срезе: ${outgoingAmount}` : "есть только ограниченный срез исходящих платежей без полного vendor-risk профиля"; - const proxyLabel = topSupplierLooksFinancial ? "outgoing cash concentration proxy" : "procurement concentration proxy"; + const proxyLabel = topSupplierLooksFinancial + ? "сигнал концентрации исходящих денег" + : "сигнал концентрации закупок/исходящих платежей"; lines.push( `Коротко: точный риск зависимости от одного поставщика по текущим данным не подтвержден; есть только ${proxyLabel}: ${supplierBasis}.` ); @@ -1043,27 +1067,23 @@ function buildCompactBusinessOverviewReply( "Это сигнал концентрации закупок/исходящих платежей, а не полный аудит надежности поставщиков, условий, качества и структуры всех расходов." ); lines.push( - "Для точного вывода нужен отдельный reviewed vendor-risk route: поставщики, договорные условия, качество поставок, сроки, доля в закупках и полная структура расходов." + "Для точного вывода нужна отдельная проверка поставщицкого риска: поставщики, договорные условия, качество поставок, сроки, доля в закупках и полная структура расходов." ); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (inventoryReserveBoundary) { const headline = toNonEmptyString(draft.headline); const inventoryQualityEvents = toRecordObject(overview.inventory_quality_events); const cleanHeadline = headline?.replace(/^Коротко:\s*/iu, "").trim(); + const reserveBasis = cleanHeadline ? localizeLine(cleanHeadline).replace(/^проверил/iu, "Проверены") : null; lines.push( - cleanHeadline - ? `Коротко: ${localizeLine(cleanHeadline)}` + reserveBasis + ? `Коротко: точно подтвердить резерв под неликвиды нельзя. ${reserveBasis}` : "Коротко: точно подтвердить резерв под неликвиды по текущим данным нельзя." ); if (inventoryQualityEvents) { - if (limitLine) { - lines.push(limitLine); - } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } const boundaryLines = userFacingLines([ ...toStringList(draft.unknown_lines), @@ -1075,10 +1095,9 @@ function buildCompactBusinessOverviewReply( lines.push(...boundaryLines.map(localizeLine)); } lines.push( - "Проверить нужно отдельно: складской срез на дату, учетную политику резервов, списания и ликвидационную стоимость; proxy-сигналы нельзя выдавать за доказанный факт резерва." + "Проверить нужно отдельно: складской срез на дату, учетную политику резервов, списания и ликвидационную стоимость; косвенные признаки нельзя выдавать за доказанный факт резерва." ); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (crossScopeExecutiveSummary && separateSubject && previousCounterpartySummary && (incomingAmount || outgoingAmount || netAmount)) { @@ -1095,8 +1114,7 @@ function buildCompactBusinessOverviewReply( if (limitLine) { lines.push(limitLine); } - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } if (rankingNeed) { @@ -1205,8 +1223,7 @@ function buildCompactBusinessOverviewReply( lines.push(limitLine); } lines.push("Для ответа именно про чистую прибыль нужно отдельно считать себестоимость, расходы и закрытие периода."); - const reply = lines.join("\n").trim(); - return reply.length > 0 && !hasInternalMechanics(reply) ? reply : null; + return joinBusinessReplyLines(lines); } function statusFrom(entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null): AssistantMcpDiscoveryResponseCandidateStatus { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts index 7e5fa20..200dd11 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts @@ -154,7 +154,7 @@ describe("assistant MCP discovery response candidate", () => { ); expect(candidate.reply_text).toContain("нельзя точно подтвердить чистую прибыль"); - expect(candidate.reply_text).toContain("P&L"); + expect(candidate.reply_text).toContain("полный отчет о прибылях и убытках"); expect(candidate.reply_text).toContain("себестоимости"); expect(candidate.reply_text).not.toContain("47 628 853"); }); @@ -208,7 +208,7 @@ describe("assistant MCP discovery response candidate", () => { ); expect(candidate.reply_text).toContain("риск зависимости"); - expect(candidate.reply_text).toContain("outgoing cash concentration proxy"); + expect(candidate.reply_text).toContain("сигнал концентрации исходящих денег"); expect(candidate.reply_text).toContain("банк/финансовая организация"); expect(candidate.reply_text).toContain("не доказанная зависимость от обычного поставщика"); expect(candidate.reply_text).not.toContain("крупнейший подтвержденный поставщик/получатель исходящих платежей: СБЕРБАНК"); @@ -296,11 +296,12 @@ describe("assistant MCP discovery response candidate", () => { }) ); - expect(candidate.reply_text).toContain("procurement-concentration route"); + expect(candidate.reply_text).toContain("проверка концентрации закупок/исходящих платежей"); expect(candidate.reply_text).toContain("банк/финансовая организация"); expect(candidate.reply_text).toContain("Поставщик А"); expect(candidate.reply_text).toContain("надежность поставщика"); expect(candidate.reply_text).not.toContain("outgoing cash concentration proxy"); + expect(candidate.reply_text).not.toContain("procurement-concentration route"); expect(candidate.reply_text).not.toContain("business_overview_route_template_v1"); }); @@ -464,7 +465,7 @@ describe("assistant MCP discovery response candidate", () => { expect(candidate.reply_text).toContain("12 474 036,91 руб"); expect(candidate.reply_text?.split("\n")[0]).toContain("крупнейший источник входящих денег: ГКУ УКРиС"); expect(candidate.reply_text?.split("\n")[0]).toContain("крупнейший получатель исходящих денег: ООО Поставщик"); - expect(candidate.reply_text).toContain("денежный operating-flow proxy"); + expect(candidate.reply_text).toContain("денежный операционный показатель"); expect(candidate.reply_text).not.toContain("Что можно сказать только как вывод:"); expect(candidate.reply_text).not.toContain("Складской срез"); }); diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index 1519b27..d1fec52 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,54 @@ [ + { + "generation_id": "gen-ag05122315-f1e27c", + "created_at": "2026-05-12T23:15:48+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 98 limit honesty and business-language replay", + "count": 6, + "domain": "address_phase98_limit_honesty_business_language", + "questions": [ + "По ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?", + "То есть просрочку доказать нельзя, коротко почему?", + "НДС за 2020 по ООО Альтернатива Плюс какой?", + "А кто принес больше всего денег за 2020?", + "По ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?", + "А зависимость от одного поставщика за 2020 можно точно оценить?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260512231548_gen-ag05122315-f1e27c.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260512231548_gen-ag05122315-f1e27c.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Focused semantic replay from assistant-stage1-v2qsm_R0fF: answers may be bounded, but they must stay business-readable, direct-first, and must not leak MCP/proxy/route/debug wording when explaining row limits, incomplete coverage, debt due-date proof, inventory reserve proof, supplier dependency, VAT, or bank-like counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase98_limit_honesty_business_language.json", + "scenario_id": "address_truth_harness_phase98_limit_honesty_business_language", + "semantic_tags": [ + "business_language", + "customer_revenue_and_payments", + "debt_due_date_aging_quality", + "debug_leak_guard", + "financial_counterparty_flow_hint", + "followup_directness", + "inventory_reserve_liquidation_quality", + "limit_honesty", + "vat", + "vendor_risk_procurement_quality" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\phase98_limit_honesty_business_language_live3", + "saved_after_validated_replay": true + } + }, { "generation_id": "gen-ag05122250-4451a8", "created_at": "2026-05-12T22:50:23+00:00", diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260512231548_gen-ag05122315-f1e27c.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260512231548_gen-ag05122315-f1e27c.json new file mode 100644 index 0000000..457b2af --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260512231548_gen-ag05122315-f1e27c.json @@ -0,0 +1,155 @@ +{ + "saved_at": "2026-05-12T23:15:48+00:00", + "generation_id": "gen-ag05122315-f1e27c", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 98 limit honesty and business-language replay", + "agent_run": true, + "questions": [ + "По ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?", + "То есть просрочку доказать нельзя, коротко почему?", + "НДС за 2020 по ООО Альтернатива Плюс какой?", + "А кто принес больше всего денег за 2020?", + "По ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?", + "А зависимость от одного поставщика за 2020 можно точно оценить?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Focused semantic replay from assistant-stage1-v2qsm_R0fF: answers may be bounded, but they must stay business-readable, direct-first, and must not leak MCP/proxy/route/debug wording when explaining row limits, incomplete coverage, debt due-date proof, inventory reserve proof, supplier dependency, VAT, or bank-like counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase98_limit_honesty_business_language.json", + "scenario_id": "address_truth_harness_phase98_limit_honesty_business_language", + "semantic_tags": [ + "business_language", + "customer_revenue_and_payments", + "debt_due_date_aging_quality", + "debug_leak_guard", + "financial_counterparty_flow_hint", + "followup_directness", + "inventory_reserve_liquidation_quality", + "limit_honesty", + "vat", + "vendor_risk_procurement_quality" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\phase98_limit_honesty_business_language_live3", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\phase98_limit_honesty_business_language_live3", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 6, + "steps_passed": 6, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "По ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "То есть просрочку доказать нельзя, коротко почему?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "НДС за 2020 по ООО Альтернатива Плюс какой?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "А кто принес больше всего денег за 2020?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "По ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-006", + "role": "user", + "text": "А зависимость от одного поставщика за 2020 можно точно оценить?", + "created_at": "2026-05-12T23:15:48+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Focused semantic replay from assistant-stage1-v2qsm_R0fF: answers may be bounded, but they must stay business-readable, direct-first, and must not leak MCP/proxy/route/debug wording when explaining row limits, incomplete coverage, debt due-date proof, inventory reserve proof, supplier dependency, VAT, or bank-like counterparties.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase98_limit_honesty_business_language.json", + "scenario_id": "address_truth_harness_phase98_limit_honesty_business_language", + "semantic_tags": [ + "business_language", + "customer_revenue_and_payments", + "debt_due_date_aging_quality", + "debug_leak_guard", + "financial_counterparty_flow_hint", + "followup_directness", + "inventory_reserve_liquidation_quality", + "limit_honesty", + "vat", + "vendor_risk_procurement_quality" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\phase98_limit_honesty_business_language_live3", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\phase98_limit_honesty_business_language_live3", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 6, + "steps_passed": 6, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260512231548_gen-ag05122315-f1e27c.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260512231548_gen-ag05122315-f1e27c.json new file mode 100644 index 0000000..adbc100 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260512231548_gen-ag05122315-f1e27c.json @@ -0,0 +1,43 @@ +{ + "suite_id": "assistant_saved_session_gen-ag05122315-f1e27c", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-05-12T23:15:48+00:00", + "generation_id": "gen-ag05122315-f1e27c", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 98 limit honesty and business-language replay", + "domain": "address_phase98_limit_honesty_business_language", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Phase 98 limit honesty and business-language replay", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "По ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?" + }, + { + "user_message": "То есть просрочку доказать нельзя, коротко почему?" + }, + { + "user_message": "НДС за 2020 по ООО Альтернатива Плюс какой?" + }, + { + "user_message": "А кто принес больше всего денег за 2020?" + }, + { + "user_message": "По ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?" + }, + { + "user_message": "А зависимость от одного поставщика за 2020 можно точно оценить?" + } + ] + } + ] +}