From 9048632d3e4322e634634d0036c3d757777f6603 Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 14 Apr 2026 08:12:12 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=A0=D0=A0=D0=9A=D0=95=D0=A1=D0=A2?= =?UTF-8?q?=D0=A0=D0=90=D0=A6=D0=98=D0=AF=20-=D0=A3=D1=82=D0=BE=D1=87?= =?UTF-8?q?=D0=BD=D0=B8=D1=82=D1=8C=20safety-policy=20=D0=BE=D1=80=D0=BA?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D1=80=D0=B0=D1=82=D0=BE=D1=80=D0=B0:=20?= =?UTF-8?q?=D0=BE=D1=81=D1=82=D0=B0=D0=BD=D0=B0=D0=B2=D0=BB=D0=B8=D0=B2?= =?UTF-8?q?=D0=B0=D1=82=D1=8C=20loop=20=D0=BD=D0=B0=20hard=20blockers=20?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B8=D1=81=D0=BA=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD?= =?UTF-8?q?=D1=8B=D1=85=20=D1=80=D0=B5=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=D1=85?= =?UTF-8?q?,=20=D0=B0=20=D0=BD=D0=B5=20=D1=82=D0=BE=D0=BB=D1=8C=D0=BA?= =?UTF-8?q?=D0=BE=20=D0=BF=D0=BE=20=D0=BF=D0=BE=D1=80=D0=BE=D0=B3=D1=83=20?= =?UTF-8?q?80%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .codex/agents/orchestrator.toml | 20 +- .codex/skills/domain-case-loop/SKILL.md | 57 +- .../references/artifact_layout.md | 50 + .../references/repo_runtime_map.md | 18 +- .../scenario_manifest_template.json | 32 + AGENTS.md | 3 + artifacts/runtime_logs/backend_stderr.log | 0 artifacts/runtime_logs/backend_stdout.log | 1 + .../domain_inventory_stock_supplier_trace.md | 95 + ...n_inventory_stock_supplier_trace_pack.json | 195 ++ ...ain_inventory_stock_supplier_trace_pool.md | 73 + .../domain_scenario_loop_repo_adapter.md | 80 + .../domain_loop_analyst_verdict.schema.json | 92 + .../domain_loop_coder_result.schema.json | 37 + .../dist/services/addressCapabilityPolicy.js | 33 + .../dist/services/addressFilterExtractor.js | 3 +- .../dist/services/addressIntentResolver.js | 91 + .../src/services/addressCapabilityPolicy.ts | 39 + .../src/services/addressFilterExtractor.ts | 5 +- .../src/services/addressIntentResolver.ts | 122 ++ .../backend/src/types/addressQuery.ts | 12 +- .../tests/addressCapabilityPolicy.test.ts | 24 + .../tests/addressQueryRuntimeM23.test.ts | 48 +- scripts/domain_case_loop.py | 1696 ++++++++++++++++- 24 files changed, 2790 insertions(+), 36 deletions(-) create mode 100644 .codex/skills/domain-case-loop/references/scenario_manifest_template.json create mode 100644 artifacts/runtime_logs/backend_stderr.log create mode 100644 artifacts/runtime_logs/backend_stdout.log create mode 100644 docs/orchestration/domain_inventory_stock_supplier_trace.md create mode 100644 docs/orchestration/domain_inventory_stock_supplier_trace_pack.json create mode 100644 docs/orchestration/domain_inventory_stock_supplier_trace_pool.md create mode 100644 docs/orchestration/domain_scenario_loop_repo_adapter.md create mode 100644 docs/orchestration/schemas/domain_loop_analyst_verdict.schema.json create mode 100644 docs/orchestration/schemas/domain_loop_coder_result.schema.json diff --git a/.codex/agents/orchestrator.toml b/.codex/agents/orchestrator.toml index b0378e5..a15f171 100644 --- a/.codex/agents/orchestrator.toml +++ b/.codex/agents/orchestrator.toml @@ -1,5 +1,5 @@ name = "orchestrator" -description = "Coordinates a repo-native domain-case loop for NDC_1C: baseline capture, analyst verdict, minimal domain patch, rerun, and 80-point acceptance gate." +description = "Coordinates a repo-native domain-case or scenario loop for NDC_1C: baseline or scenario capture, analyst verdict, minimal domain patch, rerun, and 80-point acceptance gate." model = "gpt-5.4" model_reasoning_effort = "high" sandbox_mode = "workspace-write" @@ -13,14 +13,18 @@ Primary repo facts: - The helper runner is python scripts/domain_case_loop.py. Your job: -1. Accept one concrete domain case from the user. -2. Create or reuse an artifact folder under artifacts/domain_runs//. +1. Accept one concrete domain case or one linked multi-step domain scenario from the user. +2. Create or reuse an artifact folder under artifacts/domain_runs// or artifacts/domain_runs//. 3. Capture baseline via one of: - python scripts/domain_case_loop.py run-case ... - python scripts/domain_case_loop.py import-export ... -4. Ask domain_analyst for a strict verdict in Russian using baseline_turn.json first, then baseline_output.md / baseline_debug.json. + - python scripts/domain_case_loop.py run-scenario --manifest ... + - python scripts/domain_case_loop.py run-pack --manifest ... +4. Ask domain_analyst for a strict verdict in Russian using machine-readable artifacts first: + - case mode: baseline_turn.json, then baseline_output.md / baseline_debug.json + - scenario mode: scenario_state.json and per-step turn.json, then scenario_summary.md / per-step debug.json 5. Feed the verdict to domain_coder for the smallest defensible domain-only patch. -6. Capture rerun artifacts. +6. Capture rerun artifacts or scenario rerun artifacts. 7. Ask domain_analyst for before/after comparison and a quality score. 8. End with one status: accepted | partial | blocked | needs_exact_capability. @@ -31,6 +35,9 @@ Hard rules: - Keep the loop artifact-driven. - Reuse the existing backend/session/export flow; do not invent a parallel runtime. - When the repo structure differs from a template, adapt the skill/scripts/paths, not the product architecture. +- In autonomous loop mode, do not stop only because the analyst says `needs_exact_capability` or `partial` if there is still autonomous implementation work to do. +- Stop early when the analyst sets `requires_user_decision = true` because the next step would otherwise require guessing a missing required observation, accepting a risky architecture fork, choosing a business-critical tradeoff, or pushing through a hacky / brittle / disproportionally complex fix. +- Treat true runtime or 1C availability failures as `blocked`, not as a normal low-score iteration. Acceptance gate: - accepted requires analyst quality_score >= 80 @@ -42,6 +49,9 @@ Required artifacts per cycle: - baseline_output.md - baseline_debug.json - baseline_turn.json +- scenario_manifest.json +- scenario_state.json +- scenario_summary.md - analyst_verdict.md - coder_plan.md - patch_summary.md diff --git a/.codex/skills/domain-case-loop/SKILL.md b/.codex/skills/domain-case-loop/SKILL.md index afd1523..da25a06 100644 --- a/.codex/skills/domain-case-loop/SKILL.md +++ b/.codex/skills/domain-case-loop/SKILL.md @@ -1,11 +1,11 @@ --- name: domain-case-loop -description: Use this skill when a user wants to iteratively refine one NDC_1C domain case through a multi-agent loop: automated baseline capture, JSON analysis, minimal domain patch, rerun, and before/after verdict. +description: "Use this skill when a user wants to iteratively refine one NDC_1C domain case or one linked multi-step domain scenario through a multi-agent loop: automated capture, JSON analysis, minimal domain patch, rerun, and before/after verdict." --- # Domain case loop -This skill packages the standard workflow for iterating on one concrete domain case in NDC_1C. +This skill packages the standard workflow for iterating on one concrete domain case or one linked multi-step domain scenario in NDC_1C. ## Use this skill when @@ -14,6 +14,7 @@ This skill packages the standard workflow for iterating on one concrete domain c - the route is wrong even if the wording looks better; - there is a gap between exact compute intent and actual fallback output; - there are follow-up / continuation bugs that corrupt business context. +- the user has a cascade of linked questions that should reuse one assistant session and semantic state. ## Do not use this skill when @@ -28,12 +29,55 @@ Read `references/repo_runtime_map.md` before the first real cycle. Use these repo-native capture paths: - automated capture: `python scripts/domain_case_loop.py run-case ...` +- linked multi-step capture: `python scripts/domain_case_loop.py run-scenario --manifest path/to/manifest.json` +- full domain question pool capture: `python scripts/domain_case_loop.py run-pack --manifest path/to/pack.json` +- autonomous full-pack loop: `python scripts/domain_case_loop.py run-pack-loop --manifest path/to/pack.json` - import existing technical export: `python scripts/domain_case_loop.py import-export ...` - `run-case` defaults to the repo's live local profile: `local / qwen2.5-14b-instruct-1m / http://127.0.0.1:1234/v1` - override with `--llm-provider`, `--llm-model`, `--llm-base-url`, `--llm-api-key` when needed +- `run-pack-loop` defaults to `gpt-5.4` for analyst and `gpt-5.4-mini` for coder; tune with `--analyst-codex-model`, `--coder-codex-model`, `--analyst-reasoning-effort`, `--coder-reasoning-effort` ## Workflow +### Scenario mode + +Use scenario mode when the user brings a linked chain such as: +- "what is on stock now" +- "who supplied this item" +- "which documents bought it" +- "was it later sold" + +In scenario mode: +- create `scenario_manifest.json` first; +- keep one shared `session_id`; +- capture each step under `artifacts/domain_runs//steps//`; +- preserve semantic carryover via explicit `scenario_state.json`, not vague model memory. + +Use `references/scenario_manifest_template.json`. + +### Pack mode + +Use pack mode when the user brings a whole domain pool and wants grouped orchestration rather than one isolated chain. + +In pack mode: +- group the question pool into several coherent scenarios; +- capture each scenario under `artifacts/domain_runs//scenarios//`; +- write aggregate `pack_state.json` and `pack_summary.md`; +- treat unresolved scenarios as enablement backlog, not as a reason to drop the domain. + +### Autonomous pack-loop mode + +Use autonomous pack-loop mode when the user wants the system to continue with analyst/coder iterations until the analyst gate is reached or the loop hits a real blocker. + +In autonomous pack-loop mode: +- run `python scripts/domain_case_loop.py run-pack-loop --manifest ...`; +- keep each iteration under `artifacts/domain_runs//iterations//`; +- read `analyst_verdict.json` before any coder patch; +- let coder patch only the highest-value domain targets from the current analyst verdict; +- stop only on `accepted`, `blocked`, explicit `requires_user_decision = true`, or `max_iterations`; +- do not stop just because the analyst returns `needs_exact_capability` or `partial` if autonomous domain enablement work still remains. +- treat `quality score >= 80` as the target gate, not as permission to keep pushing through hard blockers, missing essential observations, or unsafe fixes. + ### Step 1 - Normalize the case Create `artifacts/domain_runs//case_brief.md` with: @@ -114,6 +158,14 @@ Write `final_status.md` with one of: `needs_exact_capability` is the default status when the business/domain request is valid for the project, but the current contour is missing the route, intent, capability, or domain bootstrap needed to answer it. +`needs_exact_capability` does not automatically stop autonomous pack-loop mode. Treat it as "continue domain enablement work" unless the analyst explicitly marks `requires_user_decision = true`, the runtime is truly blocked, or the loop hits `max_iterations`. + +Autonomous pack-loop mode should stop early and ask the user when at least one of these is true: +- a required observation anchor is missing and cannot be recovered safely from artifacts, 1C, or the current scenario state; +- the next patch would introduce a hack, brittle workaround, hidden heuristic masking, or another low-trust shortcut; +- the next patch would cause risky architecture drift, disproportionate complexity, or a contour expansion with unclear blast radius; +- a business-critical ambiguity or scope tradeoff cannot be resolved from repo context and artifacts alone. + Accepted requires: - quality score >= 80 - no unresolved P0 defects @@ -125,6 +177,7 @@ Accepted requires: - If exact data should exist in 1C/MCP, prefer exact route work over prompt cosmetics. - If exact data does not exist yet in the reachable contour, return a technical insufficiency with a crisp blocker. - If the user case belongs to a project-relevant domain but is outside the current contour, do not treat that as a terminal rejection. Treat it as domain enablement work and record the missing route/intent/capability explicitly. +- Raise `requires_user_decision = true` when the loop would otherwise have to guess a missing anchor, choose between materially different risky implementations, or push through a hacky/suspicious fix path. - Never fabricate 1C data. - Keep domain fixes minimal and localized. - Preserve successful baseline scenarios. diff --git a/.codex/skills/domain-case-loop/references/artifact_layout.md b/.codex/skills/domain-case-loop/references/artifact_layout.md index 8ca394e..0f17f9e 100644 --- a/.codex/skills/domain-case-loop/references/artifact_layout.md +++ b/.codex/skills/domain-case-loop/references/artifact_layout.md @@ -21,3 +21,53 @@ artifacts/domain_runs// - rerun_report_case.json - before_after_diff.md - final_status.md + +For each linked domain scenario use: + +artifacts/domain_runs// +- scenario_brief.md +- scenario_manifest.json +- manifest_source.txt +- scenario_state.json +- scenario_output.md +- scenario_summary.md +- final_status.md +- session_id.txt +- steps//output.md +- steps//debug.json +- steps//turn.json +- steps//session.json +- steps//assistant_response.json +- steps//step_state.json +- steps//resolved_question.txt + +For each full domain question pack use: + +artifacts/domain_runs// +- pack_manifest.json +- manifest_source.txt +- pack_state.json +- pack_summary.md +- final_status.md +- scenarios//... + +For each autonomous pack loop use: + +artifacts/domain_runs// +- manifest_source.txt +- loop_state.json +- loop_summary.md +- final_status.md +- iterations//analyst_prompt.md +- iterations//analyst_verdict.json +- iterations//analyst_exec.stdout.log +- iterations//analyst_exec.stderr.log +- iterations//coder_prompt.md +- iterations//coder_result.json +- iterations//coder_plan.md +- iterations//patch_summary.md +- iterations//coder_exec.stdout.log +- iterations//coder_exec.stderr.log +- iterations//pack_run.stdout.log +- iterations//pack_run.stderr.log +- iterations//pack_output/pack_run/... diff --git a/.codex/skills/domain-case-loop/references/repo_runtime_map.md b/.codex/skills/domain-case-loop/references/repo_runtime_map.md index 6805e9b..fbf6f99 100644 --- a/.codex/skills/domain-case-loop/references/repo_runtime_map.md +++ b/.codex/skills/domain-case-loop/references/repo_runtime_map.md @@ -13,10 +13,19 @@ 1. Prefer automated capture with: - `python scripts/domain_case_loop.py run-case ...` -2. If baseline already exists as copied markdown export, import it with: +2. For linked multi-step scenarios, capture with: + - `python scripts/domain_case_loop.py run-scenario --manifest ...` +3. For full domain pools grouped into several scenarios, capture with: + - `python scripts/domain_case_loop.py run-pack --manifest ...` +4. For autonomous analyst/coder improvement over a full pack, run: + - `python scripts/domain_case_loop.py run-pack-loop --manifest ...` +5. If baseline already exists as copied markdown export, import it with: - `python scripts/domain_case_loop.py import-export ...` -3. Use `baseline_turn.json` / `rerun_turn.json` as canonical analyst input. -4. Use `baseline_output.md` / `rerun_output.md` as human-readable paired artifacts. +6. Use `baseline_turn.json` / `rerun_turn.json` as canonical analyst input for case mode. +7. Use `scenario_state.json` plus per-step `turn.json` as canonical analyst input for scenario mode. +8. Use `pack_state.json` plus per-scenario `scenario_state.json` as canonical analyst input for pack mode. +9. Use `loop_state.json` plus per-iteration `analyst_verdict.json` / `coder_result.json` as canonical analyst input for autonomous pack-loop mode. +10. Use `baseline_output.md` / `rerun_output.md` or per-step `output.md` as human-readable paired artifacts. ## Default run assumptions @@ -24,6 +33,9 @@ - eval target: `assistant_stage1` - single-case async run uses generated case id `AUTO-001` - artifact root: `artifacts/domain_runs//` +- scenario capture uses `POST /api/assistant/message` and `GET /api/assistant/session/:session_id` +- live runners perform backend preflight via `GET /api/health` +- `run-pack-loop` defaults to `gpt-5.4` for analyst and `gpt-5.4-mini` for coder ## Important constraints diff --git a/.codex/skills/domain-case-loop/references/scenario_manifest_template.json b/.codex/skills/domain-case-loop/references/scenario_manifest_template.json new file mode 100644 index 0000000..d340fd8 --- /dev/null +++ b/.codex/skills/domain-case-loop/references/scenario_manifest_template.json @@ -0,0 +1,32 @@ +{ + "schema_version": "domain_scenario_manifest_v1", + "scenario_id": "inventory_supplier_trace_demo", + "domain": "inventory_stock", + "title": "Inventory stock -> supplier provenance chain", + "description": "Shared-session scenario for warehouse stock, supplier provenance, and downstream document tracing.", + "analysis_context": { + "as_of_date": "2026-04-13", + "source": "scenario_manifest" + }, + "steps": [ + { + "step_id": "step_01_inventory", + "title": "Current stock snapshot", + "question": "Какие товары сейчас лежат на складе", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_02_supplier", + "title": "Supplier provenance", + "question": "У какого поставщика купили {{step_01_inventory.entries[0].item}}", + "depends_on": ["step_01_inventory"] + }, + { + "step_id": "step_03_documents", + "title": "Purchase documents", + "question": "По каким документам был куплен {{step_01_inventory.entries[0].item}}", + "depends_on": ["step_01_inventory", "step_02_supplier"] + } + ] +} diff --git a/AGENTS.md b/AGENTS.md index 12969f7..b30f290 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,8 +16,11 @@ Rules: ## codex_domain_loop - Project-scoped Codex orchestration lives under `.codex/`. - Use `.codex/skills/domain-case-loop` for repeatable domain hardening loops on one concrete case. +- The same skill/launcher also supports multi-step domain scenarios with shared assistant session state under `artifacts/domain_runs//steps/`. +- For full domain question pools, use pack mode and aggregate artifacts under `artifacts/domain_runs//scenarios/`. - Preserve current architecture: domain loop may automate capture, review, rerun, and artifact storage, but must not rewrite runtime foundations. - Prefer machine-readable case artifacts in `artifacts/domain_runs//`, especially `baseline_turn.json` / `rerun_turn.json`, over ad hoc prose-only summaries. +- For cascading user questions in one domain, prefer scenario artifacts (`scenario_manifest.json`, `scenario_state.json`, per-step `turn.json`) over separate unlinked case folders. - If a case falls outside the current routed contour because the route/intent/capability is not wired yet, treat it as domain enablement work for this project, not as automatic out-of-scope rejection. - For new unmarked domains, `needs_exact_capability` means "bootstrap or extend the contour" rather than "close the case as unsupported". - A case can be marked `accepted` only when analyst verdict is at least `80/100`, no unresolved `P0` remains, and the rerun does not mask heuristic output as confirmed. diff --git a/artifacts/runtime_logs/backend_stderr.log b/artifacts/runtime_logs/backend_stderr.log new file mode 100644 index 0000000..e69de29 diff --git a/artifacts/runtime_logs/backend_stdout.log b/artifacts/runtime_logs/backend_stdout.log new file mode 100644 index 0000000..68a03b2 --- /dev/null +++ b/artifacts/runtime_logs/backend_stdout.log @@ -0,0 +1 @@ +{"timestamp":"2026-04-14T04:26:37.615Z","level":"info","service":"llm_normalizer_backend","message":"Backend started on http://localhost:8787"} diff --git a/docs/orchestration/domain_inventory_stock_supplier_trace.md b/docs/orchestration/domain_inventory_stock_supplier_trace.md new file mode 100644 index 0000000..831253f --- /dev/null +++ b/docs/orchestration/domain_inventory_stock_supplier_trace.md @@ -0,0 +1,95 @@ +# Domain D - Inventory Stock, Warehouse, Supplier Provenance + +## Meaning + +This domain covers one of the key business goals of the project: +- determine which items are currently on stock; +- understand which items form the balance on account `41.01`; +- trace the current stock back to supplier-side purchase provenance; +- continue the chain into purchase documents and later sale documents. + +It should be treated as project scope, even when the current contour is not yet wired for every question. + +## Question families + +### Stock snapshot +- Какие товары сейчас лежат на складе +- Из каких товаров состоит остаток по 41 счету +- Какие товары числятся на 41 счете на дату ... +- Какие конкретно номенклатуры формируют остаток по складу на дату ... + +### Purchase provenance +- От какого поставщика куплен товар ... +- У какого поставщика были куплены товары, которые сейчас лежат на складе +- По какому поставщику проходит текущий товарный остаток +- Когда был куплен товар ... +- По каким документам был куплен товар ... +- Какие товары от поставщика ... сейчас еще лежат на складе +- Какие товары по состоянию на дату ... были куплены у поставщика ... + +### Aging and unresolved residue +- Какие остатки по товарам относятся к старым закупкам +- Какие товары сейчас висят в остатке без понятной привязки к поставщику +- Есть ли остатки товара, которые закупались очень давно + +### Downstream sales trace +- Кому был продан товар ... +- Через какие документы прошел путь товара: закупка -> склад -> продажа +- Какие товары были куплены у поставщика ... и позже проданы покупателю ... + +## Observed anchors + +The domain pack now carries explicit observed anchors from real stock snapshots so that linked scenarios stay attached to business objects already seen in the system: + +- warehouse: `Основной склад` +- organization: `ООО \Альтернатива Плюс\` +- current stock anchor: `Диван трехместный` +- historical stock anchor on `2020-03-31`: `Шкаф картотечный 1000*400*2100` + +For supplier / buyer endpoints we only have candidate observed counterparties at the moment: + +- supplier candidate: `Гамма-мебель, ООО` +- buyer candidate: `Департамент капитального ремонта города Москвы` + +These candidate counterparties are used to keep the scenario realistic, but they must not be treated as confirmed provenance until an exact trace capability exists. + +## Domain rules + +- `stock snapshot` belongs to exact compute and should prefer confirmed balance routes. +- `supplier provenance` must not silently collapse multiple historical suppliers into one confirmed answer. +- If provenance is not uniquely provable, the system should surface that as unresolved or candidate provenance, not as a fabricated exact answer. +- Missing route, intent, capability, or domain bootstrap is enablement work, not out-of-scope rejection. + +## Capability backlog + +### Ready or near-ready +- `inventory_on_hand_as_of_date` +- `inventory_on_hand_on_account_41_as_of_date` + +### Needs enablement +- `inventory_purchase_provenance_for_item` +- `inventory_purchase_documents_for_item` +- `inventory_supplier_stock_overlap_as_of_date` +- `inventory_sale_trace_for_item` +- `inventory_purchase_to_sale_chain` + +## Scenario orchestration guidance + +For linked user chains in this domain, prefer `run-scenario` over isolated one-off runs. + +Recommended scenario state fields: +- `as_of_date` +- `organization_scope` +- `warehouse` +- `item` +- `supplier` +- `contract` +- `purchase_documents` +- `sale_documents` +- `active_result_set_id` + +This domain is the reference use case for shared-session scenario capture. + +The full question pool is fixed in: +- [domain_inventory_stock_supplier_trace_pack.json](/x:/1C/NDC_1C/docs/orchestration/domain_inventory_stock_supplier_trace_pack.json:1) +- [domain_inventory_stock_supplier_trace_pool.md](/x:/1C/NDC_1C/docs/orchestration/domain_inventory_stock_supplier_trace_pool.md:1) diff --git a/docs/orchestration/domain_inventory_stock_supplier_trace_pack.json b/docs/orchestration/domain_inventory_stock_supplier_trace_pack.json new file mode 100644 index 0000000..0af189b --- /dev/null +++ b/docs/orchestration/domain_inventory_stock_supplier_trace_pack.json @@ -0,0 +1,195 @@ +{ + "schema_version": "domain_scenario_pack_v1", + "pack_id": "inventory_stock_supplier_trace_pool", + "domain": "inventory_stock", + "title": "Inventory stock and supplier provenance question pool", + "description": "Full orchestration pack for the warehouse stock / supplier provenance domain question pool.", + "analysis_context": { + "as_of_date": "2026-04-13", + "source": "scenario_pack" + }, + "bindings": { + "target_date": "2020-03-31", + "observed_warehouse": "Основной склад", + "observed_organization": "ООО \\Альтернатива Плюс\\", + "focus_item_current": "Диван трехместный", + "focus_item_historical": "Шкаф картотечный 1000*400*2100", + "observed_supplier_candidate": "Гамма-мебель, ООО", + "observed_customer_candidate": "Департамент капитального ремонта города Москвы" + }, + "scenarios": [ + { + "scenario_id": "inventory_snapshot_core", + "title": "Stock snapshot core", + "description": "Questions about current stock, account 41.01, and date-based snapshots.", + "steps": [ + { + "step_id": "step_01_stock_now", + "title": "Current stock", + "question": "Какие товары сейчас лежат на складе", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_02_account_41_now", + "title": "Account 41 composition", + "question": "Из каких товаров состоит остаток по 41 счету", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_03_account_41_on_date", + "title": "Account 41 on chosen date", + "question": "Какие товары числятся на 41 счете на дату {{bindings.target_date}}", + "analysis_context": { + "as_of_date": "2020-03-31", + "source": "pack_binding_target_date" + }, + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_04_stock_nomenclature_on_date", + "title": "Stock nomenclature on chosen date", + "question": "Какие конкретно номенклатуры формируют остаток по складу на дату {{bindings.target_date}}", + "analysis_context": { + "as_of_date": "2020-03-31", + "source": "pack_binding_target_date" + }, + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + } + ] + }, + { + "scenario_id": "inventory_purchase_provenance", + "title": "Purchase provenance and supplier linkage", + "description": "Questions about suppliers, purchase dates, purchase documents, and supplier-scoped stock.", + "steps": [ + { + "step_id": "step_01_stock_now", + "title": "Current stock", + "question": "Какие товары сейчас лежат на складе", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_02_item_supplier", + "title": "Supplier for chosen item", + "question": "От какого поставщика куплен товар {{bindings.focus_item_current}} из текущего остатка на складе {{bindings.observed_warehouse}}", + "depends_on": ["step_01_stock_now"] + }, + { + "step_id": "step_03_suppliers_for_current_stock", + "title": "Suppliers behind current stock", + "question": "У какого поставщика были куплены товары, которые сейчас лежат на складе {{bindings.observed_warehouse}} организации {{bindings.observed_organization}}", + "depends_on": ["step_01_stock_now"] + }, + { + "step_id": "step_04_supplier_current_residue", + "title": "Supplier attribution of current residue", + "question": "По какому поставщику проходит текущий товарный остаток на складе {{bindings.observed_warehouse}}", + "depends_on": ["step_01_stock_now"] + }, + { + "step_id": "step_05_item_purchase_date", + "title": "Purchase date for chosen item", + "question": "Когда был куплен товар {{bindings.focus_item_current}} из текущего остатка на складе {{bindings.observed_warehouse}}", + "depends_on": ["step_01_stock_now", "step_02_item_supplier"] + }, + { + "step_id": "step_06_item_purchase_documents", + "title": "Purchase documents for chosen item", + "question": "По каким документам был куплен товар {{bindings.focus_item_current}} для остатка на складе {{bindings.observed_warehouse}}", + "depends_on": ["step_01_stock_now", "step_02_item_supplier"] + }, + { + "step_id": "step_07_supplier_items_now", + "title": "Current stock by supplier", + "question": "Какие товары от поставщика {{bindings.observed_supplier_candidate}} сейчас еще лежат на складе {{bindings.observed_warehouse}}", + "depends_on": ["step_01_stock_now"] + }, + { + "step_id": "step_08_supplier_items_on_date", + "title": "Supplier items on chosen date", + "question": "Какие товары по состоянию на дату {{bindings.target_date}} на складе {{bindings.observed_warehouse}} были куплены у поставщика {{bindings.observed_supplier_candidate}}", + "analysis_context": { + "as_of_date": "2020-03-31", + "source": "pack_binding_target_date" + } + } + ] + }, + { + "scenario_id": "inventory_aging_and_unresolved", + "title": "Aging and unresolved stock residue", + "description": "Questions about old purchases and stock with missing supplier linkage.", + "steps": [ + { + "step_id": "step_01_stock_now", + "title": "Current stock", + "question": "Какие товары сейчас лежат на складе", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_02_old_purchase_residue", + "title": "Old purchase residue", + "question": "Относится ли товар {{bindings.focus_item_historical}} в остатке на дату {{bindings.target_date}} к старым закупкам", + "depends_on": ["step_01_stock_now"], + "analysis_context": { + "as_of_date": "2020-03-31", + "source": "pack_binding_target_date" + } + }, + { + "step_id": "step_03_unresolved_supplier_link", + "title": "Unresolved supplier linkage", + "question": "Есть ли на складе {{bindings.observed_warehouse}} товары с остатком без понятной привязки к поставщику", + "depends_on": ["step_01_stock_now"] + }, + { + "step_id": "step_04_very_old_stock", + "title": "Very old stock", + "question": "Есть ли среди текущих остатков на складе {{bindings.observed_warehouse}} позиции, закупленные задолго до {{bindings.target_date}}", + "depends_on": ["step_01_stock_now", "step_02_old_purchase_residue"] + } + ] + }, + { + "scenario_id": "inventory_sale_trace", + "title": "Sale trace and purchase-to-sale chain", + "description": "Questions about buyers, sale documents, and full purchase -> stock -> sale chains.", + "steps": [ + { + "step_id": "step_01_stock_on_date", + "title": "Historical stock anchor", + "question": "Какие товары числятся на 41 счете на дату {{bindings.target_date}} на складе {{bindings.observed_warehouse}}", + "analysis_context": { + "as_of_date": "2020-03-31", + "source": "pack_binding_target_date" + }, + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance" + }, + { + "step_id": "step_02_item_buyer", + "title": "Buyer for chosen item", + "question": "Кому был продан товар {{bindings.focus_item_historical}}", + "depends_on": ["step_01_stock_on_date"] + }, + { + "step_id": "step_03_item_purchase_stock_sale_chain", + "title": "Full document chain for chosen item", + "question": "Через какие документы прошел путь товара {{bindings.focus_item_historical}}: закупка -> склад -> продажа", + "depends_on": ["step_01_stock_on_date", "step_02_item_buyer"] + }, + { + "step_id": "step_04_supplier_to_buyer_chain", + "title": "Supplier to buyer overlap", + "question": "Есть ли документально подтвержденная цепочка: поставщик {{bindings.observed_supplier_candidate}} -> товар {{bindings.focus_item_historical}} -> покупатель {{bindings.observed_customer_candidate}}" + } + ] + } + ] +} diff --git a/docs/orchestration/domain_inventory_stock_supplier_trace_pool.md b/docs/orchestration/domain_inventory_stock_supplier_trace_pool.md new file mode 100644 index 0000000..42024bf --- /dev/null +++ b/docs/orchestration/domain_inventory_stock_supplier_trace_pool.md @@ -0,0 +1,73 @@ +# Inventory Question Pool - Orchestration Map + +## Purpose + +This file fixes the full user question pool for the warehouse stock / supplier provenance domain in orchestration format. + +The runnable pack is: +- [domain_inventory_stock_supplier_trace_pack.json](/x:/1C/NDC_1C/docs/orchestration/domain_inventory_stock_supplier_trace_pack.json:1) + +## Scenario map + +## Observed anchors + +These anchors are taken from the live stock snapshots already captured by the orchestration run and are used to keep provenance / trace scenarios grounded in real observed objects: + +- warehouse: `Основной склад` +- organization: `ООО \Альтернатива Плюс\` +- current focus item: `Диван трехместный` +- historical focus item on `2020-03-31`: `Шкаф картотечный 1000*400*2100` +- supplier candidate from observed counterparty artifacts: `Гамма-мебель, ООО` +- customer candidate from observed counterparty artifacts: `Департамент капитального ремонта города Москвы` + +The supplier/customer anchors above are still `candidate` observations, not confirmed purchase-to-sale truth. + +### Scenario 1 - `inventory_snapshot_core` +- Какие товары сейчас лежат на складе +- Из каких товаров состоит остаток по 41 счету +- Какие товары числятся на 41 счете на дату ... +- Какие конкретно номенклатуры формируют остаток по складу на дату ... + +### Scenario 2 - `inventory_purchase_provenance` +- От какого поставщика куплен товар ... +- У какого поставщика были куплены товары, которые сейчас лежат на складе +- По какому поставщику проходит текущий товарный остаток +- Когда был куплен товар ... +- По каким документам был куплен товар ... +- Какие товары от поставщика ... сейчас еще лежат на складе +- Какие товары по состоянию на дату ... были куплены у поставщика ... + +### Scenario 3 - `inventory_aging_and_unresolved` +- Какие остатки по товарам относятся к старым закупкам +- Какие товары сейчас висят в остатке без понятной привязки к поставщику +- Есть ли остатки товара, которые закупались очень давно + +### Scenario 4 - `inventory_sale_trace` +- Кому был продан товар ... +- Через какие документы прошел путь товара: закупка -> склад -> продажа +- Какие товары были куплены у поставщика ... и позже проданы покупателю ... + +This scenario is now anchored to the observed historical item `Шкаф картотечный 1000*400*2100` rather than to an arbitrary item from the current stock list. + +## Current readiness + +### Exact now +- stock snapshot questions on current date or chosen date +- account `41.01` stock composition and nomenclature snapshot + +### Partial now +- some document-oriented questions can route into existing `documents_drilldown`, but this is not yet a true item purchase provenance chain + +### Needs exact capability +- supplier attribution for current residue +- supplier-scoped current stock overlap +- purchase provenance date / supplier trace for one item +- old purchase aging over current residue +- unresolved supplier linkage over current stock +- downstream sale trace and purchase -> stock -> sale chain + +## Operational rule + +This pool is not treated as out-of-scope. + +Every unresolved question in this pack is domain enablement work inside the project contour. diff --git a/docs/orchestration/domain_scenario_loop_repo_adapter.md b/docs/orchestration/domain_scenario_loop_repo_adapter.md new file mode 100644 index 0000000..28964c7 --- /dev/null +++ b/docs/orchestration/domain_scenario_loop_repo_adapter.md @@ -0,0 +1,80 @@ +# Domain Scenario Loop - Repo Adapter + +## Purpose + +This repository now supports two outer-loop capture modes: +- `run-case` for one concrete domain question; +- `run-scenario` for a linked multi-step domain chain that should reuse one assistant session. +- `run-pack` for a whole domain question pool grouped into several scenarios. +- `run-pack-loop` for an autonomous analyst/coder loop over a whole domain pack. + +`run-scenario` is the preferred capture mode for domains where the user's next question depends on the previous result set. +`run-pack` is the preferred capture mode when the user brings a full domain pool that should be kept in one aggregate backlog. + +## Runtime contract + +The scenario runner does not introduce a new product runtime. + +It reuses: +- `POST /api/assistant/message` +- `GET /api/assistant/session/:session_id` +- current backend LLM/profile configuration +- current address/deep routing inside the product + +## Artifact contract + +Scenario artifacts live under: + +`artifacts/domain_runs//` + +Top-level artifacts: +- `scenario_brief.md` +- `scenario_manifest.json` +- `scenario_state.json` +- `scenario_summary.md` +- `scenario_output.md` +- `final_status.md` + +Per-step artifacts: +- `steps//output.md` +- `steps//debug.json` +- `steps//turn.json` +- `steps//session.json` +- `steps//assistant_response.json` +- `steps//step_state.json` + +Pack artifacts live under: + +`artifacts/domain_runs//` + +- `pack_manifest.json` +- `pack_state.json` +- `pack_summary.md` +- `final_status.md` +- `scenarios//...` + +## Placeholder contract + +Scenario questions can reference earlier step outputs with placeholders such as: + +- `{{step_01_inventory.entries[0].item}}` +- `{{semantic_memory.active_result_set_id}}` + +This keeps carryover explicit and machine-readable. + +## Status contract + +Scenario capture uses four operational statuses: +- `accepted` +- `partial` +- `blocked` +- `needs_exact_capability` + +`partial` means the scenario executed, but one or more steps still need route hardening, evidence hardening, or presentation hardening. +`needs_exact_capability` means the scenario is valid for the project, but the current contour still lacks the exact route or capability needed to answer it. + +In autonomous pack-loop mode, `partial` and `needs_exact_capability` are non-terminal by default. The loop should continue domain enablement work until one of these happens: +- analyst quality reaches the configured acceptance gate, normally `>= 80`; +- the analyst marks `requires_user_decision = true` because the next step would otherwise require guessing a missing required observation, making an architecture-risky change, accepting a hacky/brittle workaround, or choosing a business-critical tradeoff without enough evidence; +- the runtime is truly blocked; +- the loop reaches `max_iterations`. diff --git a/docs/orchestration/schemas/domain_loop_analyst_verdict.schema.json b/docs/orchestration/schemas/domain_loop_analyst_verdict.schema.json new file mode 100644 index 0000000..d49373e --- /dev/null +++ b/docs/orchestration/schemas/domain_loop_analyst_verdict.schema.json @@ -0,0 +1,92 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Domain Loop Analyst Verdict", + "type": "object", + "additionalProperties": false, + "required": [ + "summary", + "quality_score", + "loop_decision", + "requires_user_decision", + "user_decision_type", + "user_decision_prompt", + "unresolved_p0_count", + "regression_detected", + "priority_targets", + "acceptance_criteria", + "notes" + ], + "properties": { + "summary": { + "type": "string" + }, + "quality_score": { + "type": "integer", + "minimum": 0, + "maximum": 100 + }, + "loop_decision": { + "type": "string", + "enum": ["accepted", "continue", "partial", "blocked", "needs_exact_capability"] + }, + "requires_user_decision": { + "type": "boolean", + "description": "Set true only when the autonomous loop must stop and ask the user because the next step is an architecture fork, important business question, scope tradeoff, or another non-autonomous decision." + }, + "user_decision_type": { + "type": "string", + "enum": ["none", "architecture_fork", "important_business_question", "scope_tradeoff", "data_truth_gap", "missing_required_observation", "risky_workaround", "risky_complexity", "other"], + "description": "Explain why the loop needs user input. Use none when requires_user_decision is false." + }, + "user_decision_prompt": { + "type": ["string", "null"], + "description": "Short user-facing question to unblock the loop when requires_user_decision is true, otherwise null." + }, + "unresolved_p0_count": { + "type": "integer", + "minimum": 0 + }, + "regression_detected": { + "type": "boolean" + }, + "priority_targets": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["scenario_id", "step_id", "severity", "problem_type", "fix_goal"], + "properties": { + "scenario_id": { + "type": "string" + }, + "step_id": { + "type": ["string", "null"] + }, + "severity": { + "type": "string", + "enum": ["P0", "P1", "P2"] + }, + "problem_type": { + "type": "string", + "enum": ["route_gap", "capability_gap", "evidence_gap", "presentation_gap", "regression", "other"] + }, + "fix_goal": { + "type": "string" + } + } + } + }, + "acceptance_criteria": { + "type": "array", + "items": { + "type": "string" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } +} diff --git a/docs/orchestration/schemas/domain_loop_coder_result.schema.json b/docs/orchestration/schemas/domain_loop_coder_result.schema.json new file mode 100644 index 0000000..8dfc296 --- /dev/null +++ b/docs/orchestration/schemas/domain_loop_coder_result.schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Domain Loop Coder Result", + "type": "object", + "additionalProperties": false, + "required": [ + "status", + "summary", + "changed_files", + "notes", + "patch_summary_path" + ], + "properties": { + "status": { + "type": "string", + "enum": ["patched", "no_changes", "blocked"] + }, + "summary": { + "type": "string" + }, + "changed_files": { + "type": "array", + "items": { + "type": "string" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + }, + "patch_summary_path": { + "type": ["string", "null"] + } + } +} diff --git a/llm_normalizer/backend/dist/services/addressCapabilityPolicy.js b/llm_normalizer/backend/dist/services/addressCapabilityPolicy.js index ea820b9..ceb615c 100644 --- a/llm_normalizer/backend/dist/services/addressCapabilityPolicy.js +++ b/llm_normalizer/backend/dist/services/addressCapabilityPolicy.js @@ -8,6 +8,12 @@ const COMPUTE_EXACT_INTENTS = new Set([ "account_balance_snapshot", "documents_forming_balance", "inventory_on_hand_as_of_date", + "inventory_purchase_provenance_for_item", + "inventory_purchase_documents_for_item", + "inventory_supplier_stock_overlap_as_of_date", + "inventory_sale_trace_for_item", + "inventory_purchase_to_sale_chain", + "inventory_aging_by_purchase_date", "open_contracts_confirmed_as_of_date", "payables_confirmed_as_of_date", "receivables_confirmed_as_of_date", @@ -55,6 +61,14 @@ function defaultCapabilityId(intent) { if (intent === "inventory_on_hand_as_of_date") { return "confirmed_inventory_on_hand_as_of_date"; } + if (intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date") { + return `inventory_${intent}`; + } if (intent === "list_payables_counterparties") { return "payables_candidates_list"; } @@ -122,6 +136,17 @@ function resolveCapabilityEnabled(intent) { : "inventory_on_hand_route_disabled_by_flag" }; } + if (intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date") { + return { + enabled: false, + reason: "inventory_provenance_route_not_implemented" + }; + } if (intent === "list_payables_counterparties") { return { enabled: config_1.FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1, @@ -181,5 +206,13 @@ function resolveShadowRouteIntent(intent, requestedResultMode) { if (intent === "list_payables_counterparties" && requestedResultMode === "confirmed_balance") { return "payables_confirmed_as_of_date"; } + if (intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date") { + return null; + } return null; } diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index 0e15f68..ce8195c 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.extractAddressFilters = extractAddressFilters; const iconv_lite_1 = __importDefault(require("iconv-lite")); const ACCOUNT_PATTERN = /(?:сч[её]т|счет|account)[^0-9]{0,12}(\d{2}(?:[.,]\d{1,2})?)/i; +const ACCOUNT_REVERSE_PATTERN = /(?:^|[\s,.;:!?()\-])(\d{2}(?:[.,]\d{1,2})?)(?=\s*(?:сч[её]т|счет|account|acct))/iu; const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu; const COUNTERPARTY_PATTERN = /(?:по\s+контрагенту|контрагент(?:у|а)?|по\s+контре|контра|по\s+компан(?:ии|ию|ия)|компан(?:ия|ии|ию)|по\s+организац(?:ии|ию|ия)|организац(?:ия|ии|ию)|по\s+поставщик(?:у|а)?|поставщик(?:у|а)?|по\s+клиент(?:у|а)?|клиент(?:у|а)?|по\s+покупател(?:ю|я)|покупател(?:ю|я)|по\s+партнер(?:у|а)?|партнер(?:у|а)?|by\s+counterparty|counterparty|by\s+company|company|by\s+supplier|supplier|by\s+vendor|vendor|by\s+customer|customer|by\s+client|client|by\s+partner|partner)\s+([^\r\n,.;:]+)/iu; const CONTRACT_PATTERN = /(?:по\s+(?:договору|контракту)|(?:договор|контракт)(?:у|а)?\s*(?:№|#|n)?|by\s+contract|contract(?:\s*(?:no|number|#|n))?)\s+([^\r\n,.;:]+)/i; @@ -876,7 +877,7 @@ function extractAddressFilters(userMessage, intent) { const warnings = []; const explicitAsOfDate = extractAsOfDate(text); const explicitAsOfDateWithCue = extractAsOfDateWithCue(text); - const accountMatch = text.match(ACCOUNT_PATTERN); + const accountMatch = text.match(ACCOUNT_REVERSE_PATTERN) ?? text.match(ACCOUNT_PATTERN); if (accountMatch) { filters.account = String(accountMatch[1]).replace(",", "."); } diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index bc0fc85..b655407 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1307,11 +1307,56 @@ function hasInventoryOnHandSignal(text) { if (!hasStockLexeme) { return false; } + if (hasInventoryProvenanceSignalV2(text) || + hasInventoryPurchaseDocumentsSignalV2(text) || + hasInventorySaleTraceSignalV2(text)) { + return false; + } const hasGoodsLexeme = /(?:товар(?:ы|ов|ом|а|ные)?|номенклатур|материал(?:ы|ов|а|ам)?|item(?:s)?|sku|product(?:s)?)/iu.test(text); const hasBalanceLexeme = /(?:леж(?:ит|ат)|есть|числ(?:ит(?:ся|сь)|ятся)|остат(?:ок|ки)|срез|на\s+дат|по\s+состоянию|на\s+конец|today|now|current|as\s+of)/iu.test(text); const hasRequestCue = /(?:покажи|показать|выведи|дай|какие|что|какой|сколько|show|list|which|what)/iu.test(text); return (hasGoodsLexeme || hasBalanceLexeme) && (hasRequestCue || hasBalanceLexeme); } +function hasInventoryProvenanceSignal(text) { + return /(?:поставщик|закупк|РїСЂРѕРёСЃС…РѕР¶Рґ|откуда|РєРѕРіРґР° был куплен|активная закупк|purchase provenance|purchase date|supplier provenance|stock overlap)/iu.test(text); +} +function hasInventoryPurchaseDocumentsSignal(text) { + return /(?:РїРѕ каким документам|документы закупки|purchase documents|documents of purchase|through which documents|chain of documents)/iu.test(text); +} +function hasInventorySaleTraceSignal(text) { + return /(?:продаж|покупател|buyer|sale trace|purchase[\s-]?to[\s-]?sale|purchase -> warehouse -> sale|закупка.*продаж)/iu.test(text); +} +function hasInventoryProvenanceSignalV2(text) { + const hasItemCue = /(?:товар|номенклатур|sku|item|product|остат(?:ок|ки)|склад)/iu.test(text); + const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|поставщик|supplier|vendor)/iu.test(text); + const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|purchase\s+provenance|purchase\s+date)/iu.test(text); + return hasItemCue && hasSupplierCue && hasPurchaseCue; +} +function hasInventoryPurchaseDocumentsSignalV2(text) { + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasPurchaseDocCue = /(?:по\s+каким\s+документам\s+был\s+куплен|по\s+каким\s+документам\s+куплен|какими\s+документами\s+был\s+куплен|документ(?:ам|ы)\s+закупк|purchase\s+documents|documents\s+of\s+purchase|through\s+which\s+documents)/iu.test(text); + return hasItemCue && hasPurchaseDocCue; +} +function hasInventorySaleTraceSignalV2(text) { + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasTraceCue = /(?:кому\s+был\s+продан|кто\s+купил|buyer|sale\s+trace|trace\s+of\s+sale|через\s+какие\s+документы\s+прош[её]л\s+путь\s+товара|закупк.*склад.*продаж|purchase[\s-]?to[\s-]?sale|purchase\s*->\s*warehouse\s*->\s*sale|purchase\s*->\s*stock\s*->\s*sale)/iu.test(text); + return hasItemCue && hasTraceCue; +} +function hasInventorySupplierStockOverlapSignal(text) { + const hasSupplierCue = /(?:поставщик|supplier|vendor|от\s+поставщика|у\s+поставщика)/iu.test(text); + const hasStockCue = /(?:товар|номенклатур|склад|остат(?:ок|ки)|лежат|на\s+дату|по\s+состоянию\s+на\s+дату|current\s+stock|stock\s+overlap|что\s+сейчас\s+лежит)/iu.test(text); + return hasSupplierCue && hasStockCue; +} +function hasInventoryAgingSignal(text) { + return /(?:стар(?:ые|ым|ых)\s+закупк|закупал(?:ись|ся)\s+очень\s+давно|очень\s+давно|давно\s+куплен|когда\s+куплен|возраст\s+остатк|aged?\s+stock|old\s+purchase|aging\s+by\s+purchase\s+date|very\s+old\s+stock)/iu.test(text); +} +function hasInventoryPurchaseToSaleChainSignal(text) { + const hasSupplierCue = /(?:поставщик|supplier|vendor|от\s+кого\s+куплен)/iu.test(text); + const hasBuyerCue = /(?:покупател|buyer|customer|client|кому\s+был\s+продан)/iu.test(text); + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasPurchaseSaleCue = /(?:куплен(?:ы)?|закупк|позже\s+продан(?:ы)?|продан(?:ы)?|purchase|sale|цепочк[аи]\s+движен)/iu.test(text); + return (hasSupplierCue && hasBuyerCue && hasItemCue && hasPurchaseSaleCue) || /(?:purchase[\s-]?to[\s-]?sale\s+chain|закупка\s*->\s*склад\s*->\s*продажа)/iu.test(text); +} function resolveAddressIntent(userMessage) { const text = String(userMessage ?? "").trim().toLowerCase(); if (hasVatLiabilityConfirmedTaxPeriodSignal(text)) { @@ -1414,6 +1459,48 @@ function resolveAddressIntent(userMessage) { reasons: ["documents_by_account_drilldown_signal_detected"] }; } + if (hasInventoryProvenanceSignalV2(text)) { + return { + intent: "inventory_purchase_provenance_for_item", + confidence: "medium", + reasons: ["inventory_provenance_signal_detected"] + }; + } + if (hasInventoryPurchaseDocumentsSignalV2(text)) { + return { + intent: "inventory_purchase_documents_for_item", + confidence: "medium", + reasons: ["inventory_purchase_documents_signal_detected"] + }; + } + if (hasInventoryPurchaseToSaleChainSignal(text)) { + return { + intent: "inventory_purchase_to_sale_chain", + confidence: "medium", + reasons: ["inventory_purchase_to_sale_chain_signal_detected"] + }; + } + if (hasInventorySupplierStockOverlapSignal(text)) { + return { + intent: "inventory_supplier_stock_overlap_as_of_date", + confidence: "medium", + reasons: ["inventory_supplier_stock_overlap_signal_detected"] + }; + } + if (hasInventoryAgingSignal(text)) { + return { + intent: "inventory_aging_by_purchase_date", + confidence: "medium", + reasons: ["inventory_aging_signal_detected"] + }; + } + if (hasInventorySaleTraceSignalV2(text)) { + return { + intent: "inventory_sale_trace_for_item", + confidence: "medium", + reasons: ["inventory_sale_trace_signal_detected"] + }; + } if (hasInventoryOnHandSignal(text)) { return { intent: "inventory_on_hand_as_of_date", @@ -1430,6 +1517,10 @@ function resolveAddressIntent(userMessage) { } if (hasAny(text, OPEN_ITEMS_HINTS) && !hasCounterpartyDebtLongevitySignal(text) && + !hasInventoryAgingSignal(text) && + !hasInventoryProvenanceSignalV2(text) && + !hasInventoryPurchaseDocumentsSignalV2(text) && + !hasInventorySaleTraceSignalV2(text) && /(?:контраг|договор|контракт|counterparty|contract|покупател|клиент|заказчик|customer|client|buyer|supplier|поставщик)/iu.test(text)) { return { intent: "open_items_by_counterparty_or_contract", diff --git a/llm_normalizer/backend/src/services/addressCapabilityPolicy.ts b/llm_normalizer/backend/src/services/addressCapabilityPolicy.ts index 56a7308..e539713 100644 --- a/llm_normalizer/backend/src/services/addressCapabilityPolicy.ts +++ b/llm_normalizer/backend/src/services/addressCapabilityPolicy.ts @@ -27,6 +27,12 @@ const COMPUTE_EXACT_INTENTS = new Set([ "account_balance_snapshot", "documents_forming_balance", "inventory_on_hand_as_of_date", + "inventory_purchase_provenance_for_item", + "inventory_purchase_documents_for_item", + "inventory_supplier_stock_overlap_as_of_date", + "inventory_sale_trace_for_item", + "inventory_purchase_to_sale_chain", + "inventory_aging_by_purchase_date", "open_contracts_confirmed_as_of_date", "payables_confirmed_as_of_date", "receivables_confirmed_as_of_date", @@ -78,6 +84,16 @@ function defaultCapabilityId(intent: AddressIntent): string { if (intent === "inventory_on_hand_as_of_date") { return "confirmed_inventory_on_hand_as_of_date"; } + if ( + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date" + ) { + return `inventory_${intent}` as const; + } if (intent === "list_payables_counterparties") { return "payables_candidates_list"; } @@ -146,6 +162,19 @@ function resolveCapabilityEnabled(intent: AddressIntent): { enabled: boolean; re : "inventory_on_hand_route_disabled_by_flag" }; } + if ( + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date" + ) { + return { + enabled: false, + reason: "inventory_provenance_route_not_implemented" + }; + } if (intent === "list_payables_counterparties") { return { enabled: FEATURE_ASSISTANT_ROUTE_PAYABLES_HEURISTIC_V1, @@ -211,5 +240,15 @@ export function resolveShadowRouteIntent( if (intent === "list_payables_counterparties" && requestedResultMode === "confirmed_balance") { return "payables_confirmed_as_of_date"; } + if ( + intent === "inventory_purchase_provenance_for_item" || + intent === "inventory_purchase_documents_for_item" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "inventory_sale_trace_for_item" || + intent === "inventory_purchase_to_sale_chain" || + intent === "inventory_aging_by_purchase_date" + ) { + return null; + } return null; } diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index b1bdebd..595140f 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -2,6 +2,8 @@ import iconv from "iconv-lite"; const ACCOUNT_PATTERN = /(?:сч[её]т|счет|account)[^0-9]{0,12}(\d{2}(?:[.,]\d{1,2})?)/i; +const ACCOUNT_REVERSE_PATTERN = + /(?:^|[\s,.;:!?()\-])(\d{2}(?:[.,]\d{1,2})?)(?=\s*(?:сч[её]т|счет|account|acct))/iu; const LIMIT_PATTERN = /(?:\btop\b|\blimit\b|первые|топ)[\s\-–—_:№#]*?(\d{1,3})/iu; const COUNTERPARTY_PATTERN = /(?:по\s+контрагенту|контрагент(?:у|а)?|по\s+контре|контра|по\s+компан(?:ии|ию|ия)|компан(?:ия|ии|ию)|по\s+организац(?:ии|ию|ия)|организац(?:ия|ии|ию)|по\s+поставщик(?:у|а)?|поставщик(?:у|а)?|по\s+клиент(?:у|а)?|клиент(?:у|а)?|по\s+покупател(?:ю|я)|покупател(?:ю|я)|по\s+партнер(?:у|а)?|партнер(?:у|а)?|by\s+counterparty|counterparty|by\s+company|company|by\s+supplier|supplier|by\s+vendor|vendor|by\s+customer|customer|by\s+client|client|by\s+partner|partner)\s+([^\r\n,.;:]+)/iu; @@ -995,7 +997,7 @@ export function extractAddressFilters(userMessage: string, intent: AddressIntent const explicitAsOfDate = extractAsOfDate(text); const explicitAsOfDateWithCue = extractAsOfDateWithCue(text); - const accountMatch = text.match(ACCOUNT_PATTERN); + const accountMatch = text.match(ACCOUNT_REVERSE_PATTERN) ?? text.match(ACCOUNT_PATTERN); if (accountMatch) { filters.account = String(accountMatch[1]).replace(",", "."); } @@ -1221,4 +1223,3 @@ export function extractAddressFilters(userMessage: string, intent: AddressIntent }; } - diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index 5812652..dc08c74 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -1548,6 +1548,13 @@ function hasInventoryOnHandSignal(text: string): boolean { if (!hasStockLexeme) { return false; } + if ( + hasInventoryProvenanceSignalV2(text) || + hasInventoryPurchaseDocumentsSignalV2(text) || + hasInventorySaleTraceSignalV2(text) + ) { + return false; + } const hasGoodsLexeme = /(?:товар(?:ы|ов|ом|а|ные)?|номенклатур|материал(?:ы|ов|а|ам)?|item(?:s)?|sku|product(?:s)?)/iu.test(text); const hasBalanceLexeme = @@ -1558,6 +1565,69 @@ function hasInventoryOnHandSignal(text: string): boolean { return (hasGoodsLexeme || hasBalanceLexeme) && (hasRequestCue || hasBalanceLexeme); } +function hasInventoryProvenanceSignal(text: string): boolean { + return /(?:поставщик|закупк|РїСЂРѕРёСЃС…РѕР¶Рґ|откуда|РєРѕРіРґР° был куплен|активная закупк|purchase provenance|purchase date|supplier provenance|stock overlap)/iu.test( + text + ); +} + +function hasInventoryPurchaseDocumentsSignal(text: string): boolean { + return /(?:РїРѕ каким документам|документы закупки|purchase documents|documents of purchase|through which documents|chain of documents)/iu.test( + text + ); +} + +function hasInventorySaleTraceSignal(text: string): boolean { + return /(?:продаж|покупател|buyer|sale trace|purchase[\s-]?to[\s-]?sale|purchase -> warehouse -> sale|закупка.*продаж)/iu.test( + text + ); +} + +function hasInventoryProvenanceSignalV2(text: string): boolean { + const hasItemCue = /(?:товар|номенклатур|sku|item|product|остат(?:ок|ки)|склад)/iu.test(text); + const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|поставщик|supplier|vendor)/iu.test(text); + const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|purchase\s+provenance|purchase\s+date)/iu.test(text); + return hasItemCue && hasSupplierCue && hasPurchaseCue; +} + +function hasInventoryPurchaseDocumentsSignalV2(text: string): boolean { + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasPurchaseDocCue = /(?:по\s+каким\s+документам\s+был\s+куплен|по\s+каким\s+документам\s+куплен|какими\s+документами\s+был\s+куплен|документ(?:ам|ы)\s+закупк|purchase\s+documents|documents\s+of\s+purchase|through\s+which\s+documents)/iu.test( + text + ); + return hasItemCue && hasPurchaseDocCue; +} + +function hasInventorySaleTraceSignalV2(text: string): boolean { + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasTraceCue = /(?:кому\s+был\s+продан|кто\s+купил|buyer|sale\s+trace|trace\s+of\s+sale|через\s+какие\s+документы\s+прош[её]л\s+путь\s+товара|закупк.*склад.*продаж|purchase[\s-]?to[\s-]?sale|purchase\s*->\s*warehouse\s*->\s*sale|purchase\s*->\s*stock\s*->\s*sale)/iu.test( + text + ); + return hasItemCue && hasTraceCue; +} + +function hasInventorySupplierStockOverlapSignal(text: string): boolean { + const hasSupplierCue = /(?:поставщик|supplier|vendor|от\s+поставщика|у\s+поставщика)/iu.test(text); + const hasStockCue = /(?:товар|номенклатур|склад|остат(?:ок|ки)|лежат|на\s+дату|по\s+состоянию\s+на\s+дату|current\s+stock|stock\s+overlap|что\s+сейчас\s+лежит)/iu.test( + text + ); + return hasSupplierCue && hasStockCue; +} + +function hasInventoryAgingSignal(text: string): boolean { + return /(?:стар(?:ые|ым|ых)\s+закупк|закупал(?:ись|ся)\s+очень\s+давно|очень\s+давно|давно\s+куплен|когда\s+куплен|возраст\s+остатк|aged?\s+stock|old\s+purchase|aging\s+by\s+purchase\s+date|very\s+old\s+stock)/iu.test( + text + ); +} + +function hasInventoryPurchaseToSaleChainSignal(text: string): boolean { + const hasSupplierCue = /(?:поставщик|supplier|vendor|от\s+кого\s+куплен)/iu.test(text); + const hasBuyerCue = /(?:покупател|buyer|customer|client|кому\s+был\s+продан)/iu.test(text); + const hasItemCue = /(?:товар|номенклатур|sku|item|product)/iu.test(text); + const hasPurchaseSaleCue = /(?:куплен(?:ы)?|закупк|позже\s+продан(?:ы)?|продан(?:ы)?|purchase|sale|цепочк[аи]\s+движен)/iu.test(text); + return (hasSupplierCue && hasBuyerCue && hasItemCue && hasPurchaseSaleCue) || /(?:purchase[\s-]?to[\s-]?sale\s+chain|закупка\s*->\s*склад\s*->\s*продажа)/iu.test(text); +} + export function resolveAddressIntent(userMessage: string): AddressIntentResolution { const text = String(userMessage ?? "").trim().toLowerCase(); @@ -1675,6 +1745,54 @@ export function resolveAddressIntent(userMessage: string): AddressIntentResoluti }; } + if (hasInventoryProvenanceSignalV2(text)) { + return { + intent: "inventory_purchase_provenance_for_item", + confidence: "medium", + reasons: ["inventory_provenance_signal_detected"] + }; + } + + if (hasInventoryPurchaseDocumentsSignalV2(text)) { + return { + intent: "inventory_purchase_documents_for_item", + confidence: "medium", + reasons: ["inventory_purchase_documents_signal_detected"] + }; + } + + if (hasInventoryPurchaseToSaleChainSignal(text)) { + return { + intent: "inventory_purchase_to_sale_chain", + confidence: "medium", + reasons: ["inventory_purchase_to_sale_chain_signal_detected"] + }; + } + + if (hasInventorySupplierStockOverlapSignal(text)) { + return { + intent: "inventory_supplier_stock_overlap_as_of_date", + confidence: "medium", + reasons: ["inventory_supplier_stock_overlap_signal_detected"] + }; + } + + if (hasInventoryAgingSignal(text)) { + return { + intent: "inventory_aging_by_purchase_date", + confidence: "medium", + reasons: ["inventory_aging_signal_detected"] + }; + } + + if (hasInventorySaleTraceSignalV2(text)) { + return { + intent: "inventory_sale_trace_for_item", + confidence: "medium", + reasons: ["inventory_sale_trace_signal_detected"] + }; + } + if (hasInventoryOnHandSignal(text)) { return { intent: "inventory_on_hand_as_of_date", @@ -1694,6 +1812,10 @@ export function resolveAddressIntent(userMessage: string): AddressIntentResoluti if ( hasAny(text, OPEN_ITEMS_HINTS) && !hasCounterpartyDebtLongevitySignal(text) && + !hasInventoryAgingSignal(text) && + !hasInventoryProvenanceSignalV2(text) && + !hasInventoryPurchaseDocumentsSignalV2(text) && + !hasInventorySaleTraceSignalV2(text) && /(?:контраг|договор|контракт|counterparty|contract|покупател|клиент|заказчик|customer|client|buyer|supplier|поставщик)/iu.test( text ) diff --git a/llm_normalizer/backend/src/types/addressQuery.ts b/llm_normalizer/backend/src/types/addressQuery.ts index 2264338..56ca635 100644 --- a/llm_normalizer/backend/src/types/addressQuery.ts +++ b/llm_normalizer/backend/src/types/addressQuery.ts @@ -20,6 +20,12 @@ export type AddressIntent = | "receivables_confirmed_as_of_date" | "list_receivables_counterparties" | "inventory_on_hand_as_of_date" + | "inventory_purchase_provenance_for_item" + | "inventory_purchase_documents_for_item" + | "inventory_supplier_stock_overlap_as_of_date" + | "inventory_sale_trace_for_item" + | "inventory_purchase_to_sale_chain" + | "inventory_aging_by_purchase_date" | "account_balance_snapshot" | "open_items_by_counterparty_or_contract" | "list_documents_by_counterparty" @@ -140,7 +146,11 @@ export interface AddressRecipeDefinition { | "open_contracts_confirmed_as_of_balance_profile" | "payables_confirmed_as_of_balance_profile" | "receivables_confirmed_as_of_balance_profile" - | "inventory_on_hand_as_of_balance_profile"; + | "inventory_on_hand_as_of_balance_profile" + | "inventory_purchase_provenance_profile" + | "inventory_purchase_documents_profile" + | "inventory_supplier_stock_overlap_profile" + | "inventory_sale_trace_profile"; required_filters: Array; optional_filters: Array; default_limit: number; diff --git a/llm_normalizer/backend/tests/addressCapabilityPolicy.test.ts b/llm_normalizer/backend/tests/addressCapabilityPolicy.test.ts index e50f340..9e2acaf 100644 --- a/llm_normalizer/backend/tests/addressCapabilityPolicy.test.ts +++ b/llm_normalizer/backend/tests/addressCapabilityPolicy.test.ts @@ -42,6 +42,30 @@ describe("address capability policy", () => { expect(isCapabilityRouteBlocked(decision)).toBe(false); }); + it("marks inventory provenance intents as blocked exact-capability gaps", () => { + const decision = resolveAddressCapabilityRouteDecision("inventory_purchase_provenance_for_item"); + expect(decision.capability_id).toBe("inventory_inventory_purchase_provenance_for_item"); + expect(decision.capability_layer).toBe("compute"); + expect(decision.capability_route_mode).toBe("exact"); + expect(decision.capability_route_enabled).toBe(false); + expect(decision.capability_route_reason).toBe("inventory_provenance_route_not_implemented"); + expect(isCapabilityRouteBlocked(decision)).toBe(true); + }); + + it("marks purchase-to-sale trace and aging intents as blocked exact-capability gaps", () => { + const chainDecision = resolveAddressCapabilityRouteDecision("inventory_purchase_to_sale_chain"); + expect(chainDecision.capability_id).toBe("inventory_inventory_purchase_to_sale_chain"); + expect(chainDecision.capability_route_mode).toBe("exact"); + expect(chainDecision.capability_route_enabled).toBe(false); + expect(isCapabilityRouteBlocked(chainDecision)).toBe(true); + + const agingDecision = resolveAddressCapabilityRouteDecision("inventory_aging_by_purchase_date"); + expect(agingDecision.capability_id).toBe("inventory_inventory_aging_by_purchase_date"); + expect(agingDecision.capability_route_mode).toBe("exact"); + expect(agingDecision.capability_route_enabled).toBe(false); + expect(isCapabilityRouteBlocked(agingDecision)).toBe(true); + }); + it("maps document drilldown intent to navigation capability", () => { const decision = resolveAddressCapabilityRouteDecision("list_documents_by_contract"); expect(decision.capability_id).toBe("documents_drilldown"); diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index 38475d2..d206806 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -4394,5 +4394,51 @@ describe("address recipe catalog counterparty filtering", () => { expect(reply.semantics?.result_mode).toBe("confirmed_balance"); expect(reply.semantics?.balance_confirmed).toBe(true); }); -}); + it("routes inventory provenance questions to a dedicated intent", () => { + const result = resolveAddressIntent("От какого поставщика куплен товар Шкаф картоотечный?"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + + it("keeps inventory supplier overlap questions out of on-hand routing", () => { + const result = resolveAddressIntent("Какие товары от поставщика Альфа сейчас лежат на складе?"); + expect(result.intent).toBe("inventory_supplier_stock_overlap_as_of_date"); + }); + + it("routes inventory purchase document questions to a dedicated intent", () => { + const result = resolveAddressIntent("По каким документам был куплен товар Шкаф картоотечный?"); + expect(result.intent).toBe("inventory_purchase_documents_for_item"); + }); + + it("routes inventory sale chain questions to a dedicated intent", () => { + const result = resolveAddressIntent( + "Через какие документы прошел путь товара Шкаф картоотечный: закупка -> склад -> продажа?" + ); + expect(result.intent).toBe("inventory_sale_trace_for_item"); + }); + + it("keeps inventory provenance wording out of inventory-on-hand routing", () => { + const result = resolveAddressIntent("От кого куплен товар Шкаф картоотечный и когда был куплен?"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + + it("keeps aging wording out of open-items and bank routing", () => { + const result = resolveAddressIntent("Какие товары были куплены очень давно и до сих пор лежат на складе?"); + expect(result.intent).toBe("inventory_aging_by_purchase_date"); + }); + + it("routes purchase-document trace wording to dedicated inventory intent", () => { + const result = resolveAddressIntent("Какими документами был куплен товар Шкаф картоотечный?"); + expect(result.intent).toBe("inventory_purchase_documents_for_item"); + }); + it("keeps very old stock wording in dedicated aging intent", () => { + const result = resolveAddressIntent("Есть ли остатки товара, которые закупались очень давно?"); + expect(result.intent).toBe("inventory_aging_by_purchase_date"); + }); + + it("keeps unresolved stock provenance wording out of open-items routing", () => { + const result = resolveAddressIntent("Какие товары сейчас висят в остатке без понятной привязки к поставщику?"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + +}); diff --git a/scripts/domain_case_loop.py b/scripts/domain_case_loop.py index bf0c102..b870fa2 100644 --- a/scripts/domain_case_loop.py +++ b/scripts/domain_case_loop.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse import json import re +import subprocess import sys import textwrap import time @@ -17,6 +18,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs" DEFAULT_SESSIONS_DIR = REPO_ROOT / "llm_normalizer" / "data" / "assistant_sessions" DEFAULT_REPORTS_DIR = REPO_ROOT / "llm_normalizer" / "reports" +DEFAULT_LOOP_SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas" DEFAULT_BACKEND_URL = "http://127.0.0.1:8787" DEFAULT_PROMPT_VERSION = "address_query_runtime_v1" DEFAULT_LLM_PROVIDER = "local" @@ -26,6 +28,11 @@ DEFAULT_LLM_API_KEY = "" DEFAULT_TEMPERATURE = 0.0 DEFAULT_MAX_OUTPUT_TOKENS = 900 TECH_SECTION_HEADER = "### technical_debug_payload_json" +SCENARIO_MANIFEST_SCHEMA_VERSION = "domain_scenario_manifest_v1" +SCENARIO_STATE_SCHEMA_VERSION = "domain_scenario_state_v1" +SCENARIO_STEP_STATE_SCHEMA_VERSION = "domain_scenario_step_state_v1" +SCENARIO_PACK_SCHEMA_VERSION = "domain_scenario_pack_v1" +AUTONOMOUS_LOOP_SCHEMA_VERSION = "domain_autonomous_loop_v1" def dump_json(payload: Any) -> str: @@ -41,6 +48,10 @@ def write_json(file_path: Path, payload: Any) -> None: write_text(file_path, dump_json(payload) + "\n") +def read_text_file(file_path: Path) -> str: + return file_path.read_text(encoding="utf-8-sig") + + def sanitize_export_text(value: str) -> str: raw = str(value or "") debug_heading = re.search( @@ -102,14 +113,91 @@ def build_conversation_export(session_id: str, conversation: list[dict[str, Any] return "\n".join(lines) +def slugify_token(value: str | None, fallback: str) -> str: + cleaned = re.sub(r"[^\w-]+", "_", str(value or "").strip(), flags=re.UNICODE).strip("_") + return cleaned or fallback + + def slugify_case_id(domain: str, explicit_case_id: str | None) -> str: if explicit_case_id: normalized = explicit_case_id.strip() if normalized: return normalized timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") - cleaned_domain = re.sub(r"[^0-9A-Za-zА-Яа-я_-]+", "_", domain.strip(), flags=re.UNICODE).strip("_") - return f"{cleaned_domain or 'domain_case'}_{timestamp}" + return f"{slugify_token(domain, 'domain_case')}_{timestamp}" + + +def normalize_iso_date(value: Any) -> str | None: + if not isinstance(value, str): + return None + candidate = value.strip() + if not re.fullmatch(r"\d{4}-\d{2}-\d{2}", candidate): + return None + try: + datetime.strptime(candidate, "%Y-%m-%d") + except ValueError: + return None + return candidate + + +def normalize_analysis_context(raw_context: Any) -> dict[str, Any]: + source = raw_context if isinstance(raw_context, dict) else {} + output: dict[str, Any] = {} + as_of_date = normalize_iso_date(source.get("as_of_date")) + period_from = normalize_iso_date(source.get("period_from")) + period_to = normalize_iso_date(source.get("period_to")) + snapshot_mode_raw = str(source.get("snapshot_mode") or "").strip() + source_raw = str(source.get("source") or "").strip() + if as_of_date: + output["as_of_date"] = as_of_date + if period_from: + output["period_from"] = period_from + if period_to: + output["period_to"] = period_to + if snapshot_mode_raw in {"auto", "force_snapshot", "force_live"}: + output["snapshot_mode"] = snapshot_mode_raw + if source_raw: + output["source"] = source_raw + return output + + +def merge_analysis_context(base_context: Any, override_context: Any) -> dict[str, Any]: + merged = normalize_analysis_context(base_context) + merged.update(normalize_analysis_context(override_context)) + return merged + + +def repair_text_mojibake(value: str) -> str: + if not value: + return value + for encoding in ("utf-8", "cp1251", "cp866"): + try: + repaired = value.encode("latin1").decode(encoding) + except (UnicodeEncodeError, UnicodeDecodeError): + continue + if repaired != value: + return repaired + return value + + +def normalize_binding_value(value: Any) -> Any: + if isinstance(value, str): + return repair_text_mojibake(value) + if isinstance(value, list): + return [normalize_binding_value(item) for item in value] + if isinstance(value, dict): + return {normalize_binding_value(key) if isinstance(key, str) else key: normalize_binding_value(item) for key, item in value.items()} + return value + + +def normalize_bindings(raw_bindings: Any) -> dict[str, Any]: + if not isinstance(raw_bindings, dict): + return {} + return {str(key): normalize_binding_value(value) for key, value in raw_bindings.items()} + + +def drop_none_values(payload: dict[str, Any]) -> dict[str, Any]: + return {key: value for key, value in payload.items() if value is not None} def http_json(url: str, *, method: str = "GET", payload: dict[str, Any] | None = None, timeout: int = 30) -> dict[str, Any]: @@ -133,6 +221,15 @@ def http_json(url: str, *, method: str = "GET", payload: dict[str, Any] | None = raise RuntimeError(f"Backend returned non-JSON payload for {url}") from error +def ensure_backend_health(backend_url: str, timeout_seconds: int) -> dict[str, Any]: + health = http_json(f"{backend_url}/api/health", timeout=max(5, min(timeout_seconds, 30))) + if not isinstance(health, dict): + raise RuntimeError(f"Backend health endpoint returned invalid payload at {backend_url}/api/health") + if health.get("ok") is False: + raise RuntimeError(f"Backend health check failed for {backend_url}: {dump_json(health)}") + return health + + def wait_for_job(backend_url: str, job_id: str, timeout_seconds: int, poll_interval_seconds: float) -> dict[str, Any]: deadline = time.time() + timeout_seconds last_status = None @@ -161,13 +258,33 @@ def wait_for_file(file_path: Path, timeout_seconds: int = 30) -> None: def read_json_file(file_path: Path) -> dict[str, Any]: - return json.loads(file_path.read_text(encoding="utf-8-sig")) + return json.loads(read_text_file(file_path)) def extract_conversation_from_session(session_record: dict[str, Any]) -> list[dict[str, Any]]: + items = session_record.get("items") + if isinstance(items, list) and items: + output: list[dict[str, Any]] = [] + for item in items: + if not isinstance(item, dict): + continue + output.append( + { + "message_id": item.get("message_id"), + "role": item.get("role"), + "text": item.get("text") or "", + "reply_type": item.get("reply_type"), + "created_at": item.get("created_at"), + "trace_id": item.get("trace_id"), + "debug": item.get("debug"), + } + ) + if output: + return output + conversation = session_record.get("conversation") if isinstance(conversation, list) and conversation: - output: list[dict[str, Any]] = [] + output = [] for item in conversation: if not isinstance(item, dict): continue @@ -189,7 +306,7 @@ def extract_conversation_from_session(session_record: dict[str, Any]) -> list[di if not isinstance(turns, list): return [] - output: list[dict[str, Any]] = [] + output = [] for turn in turns: if not isinstance(turn, dict): continue @@ -267,6 +384,7 @@ def build_turn_artifact( last_user = find_last_user_before(conversation, last_assistant.get("message_id")) final_question = question or (last_user.get("text") if isinstance(last_user, dict) else None) last_turn = extract_last_turn(session_record or {}) + session_record_safe = session_record or {} return { "schema_version": "domain_case_turn_artifact_v1", "artifact_slot": slot, @@ -284,12 +402,13 @@ def build_turn_artifact( "assistant_message": last_assistant, "technical_debug_payload": last_assistant.get("debug"), "session_summary": { - "schema_version": session_record.get("schema_version") if isinstance(session_record, dict) else None, - "updated_at": session_record.get("updated_at") if isinstance(session_record, dict) else None, - "trace_ids": session_record.get("trace_ids") if isinstance(session_record, dict) else None, - "reply_types": session_record.get("reply_types") if isinstance(session_record, dict) else None, - "investigation_state": session_record.get("investigation_state") if isinstance(session_record, dict) else None, - "address_navigation_state": session_record.get("address_navigation_state") if isinstance(session_record, dict) else None, + "schema_version": session_record_safe.get("schema_version"), + "updated_at": session_record_safe.get("updated_at"), + "trace_ids": session_record_safe.get("trace_ids"), + "reply_types": session_record_safe.get("reply_types"), + "investigation_state": session_record_safe.get("investigation_state"), + "address_navigation_state": session_record_safe.get("address_navigation_state"), + "items_count": len(session_record_safe.get("items", [])) if isinstance(session_record_safe.get("items"), list) else None, "last_turn": last_turn, }, "report_case": report_case, @@ -438,7 +557,760 @@ def parse_export_markdown(text: str) -> tuple[str, list[dict[str, Any]]]: return session_id, conversation +def build_normalize_config(args: argparse.Namespace) -> dict[str, Any]: + return { + "llmProvider": args.llm_provider, + "apiKey": args.llm_api_key, + "model": args.llm_model, + "baseUrl": args.llm_base_url, + "temperature": args.temperature, + "maxOutputTokens": args.max_output_tokens, + "promptVersion": args.prompt_version, + } + + +def fetch_session_snapshot(backend_url: str, session_id: str, timeout_seconds: int) -> dict[str, Any]: + deadline = time.time() + timeout_seconds + last_error: Exception | None = None + timeout_per_call = max(10, min(30, timeout_seconds)) + while time.time() < deadline: + try: + response = http_json(f"{backend_url}/api/assistant/session/{session_id}", timeout=timeout_per_call) + session = response.get("session") + if isinstance(session, dict): + return session + except Exception as error: # noqa: BLE001 + last_error = error + time.sleep(0.5) + if last_error is not None: + raise RuntimeError(f"Failed to fetch assistant session `{session_id}`: {last_error}") from last_error + raise RuntimeError(f"Assistant session `{session_id}` was not available within {timeout_seconds} seconds") + + +def build_assistant_message_payload( + args: argparse.Namespace, + *, + question: str, + session_id: str | None, + analysis_context: dict[str, Any], +) -> dict[str, Any]: + context_payload: dict[str, Any] = {} + normalized_analysis_context = normalize_analysis_context(analysis_context) + if normalized_analysis_context: + context_payload["analysis_context"] = normalized_analysis_context + as_of_date = normalized_analysis_context.get("as_of_date") + if isinstance(as_of_date, str): + context_payload["period_hint"] = as_of_date + payload = drop_none_values( + { + "session_id": session_id, + "user_message": question, + "message": question, + "mode": "assistant", + "llmProvider": args.llm_provider, + "apiKey": args.llm_api_key, + "model": args.llm_model, + "baseUrl": args.llm_base_url, + "temperature": args.temperature, + "maxOutputTokens": args.max_output_tokens, + "promptVersion": args.prompt_version, + "context": context_payload or None, + "useMock": bool(args.use_mock), + } + ) + return payload + + +def parse_path_tokens(path_expression: str) -> list[str | int]: + tokens: list[str | int] = [] + cursor = 0 + path = path_expression.strip() + while cursor < len(path): + current = path[cursor] + if current == ".": + cursor += 1 + continue + if current == "[": + closing = path.find("]", cursor) + if closing < 0: + raise RuntimeError(f"Invalid placeholder path: {path_expression}") + raw_index = path[cursor + 1 : closing].strip() + if not raw_index.isdigit(): + raise RuntimeError(f"Only numeric list indexes are supported in placeholders: {path_expression}") + tokens.append(int(raw_index)) + cursor = closing + 1 + continue + end = cursor + while end < len(path) and path[end] not in ".[": + end += 1 + tokens.append(path[cursor:end]) + cursor = end + return tokens + + +def lookup_placeholder_value(path_expression: str, scenario_state: dict[str, Any]) -> Any: + root: dict[str, Any] = { + "scenario_state": scenario_state, + "step_outputs": scenario_state.get("step_outputs", {}), + "semantic_memory": scenario_state.get("semantic_memory", {}), + "bindings": scenario_state.get("bindings", {}), + } + if isinstance(scenario_state.get("step_outputs"), dict): + root.update(scenario_state["step_outputs"]) + current: Any = root + for token in parse_path_tokens(path_expression): + if isinstance(token, int): + if not isinstance(current, list): + raise RuntimeError(f"Placeholder `{path_expression}` does not point to a list before index access") + if token >= len(current): + raise RuntimeError(f"Placeholder `{path_expression}` index {token} is out of range") + current = current[token] + continue + if not isinstance(current, dict) or token not in current: + raise RuntimeError(f"Placeholder `{path_expression}` could not be resolved at `{token}`") + current = current[token] + return current + + +def resolve_question_template(question_template: str, scenario_state: dict[str, Any]) -> str: + pattern = re.compile(r"{{\s*([^{}]+?)\s*}}") + + def replace(match: re.Match[str]) -> str: + value = lookup_placeholder_value(match.group(1), scenario_state) + if isinstance(value, (dict, list)): + return dump_json(value) + return str(value) + + return pattern.sub(replace, question_template).strip() + + +def build_failed_step_state( + *, + scenario_id: str, + domain: str, + step: dict[str, Any], + step_index: int, + question_resolved: str, + status: str, + failure_type: str, + error_message: str, +) -> dict[str, Any]: + return { + "schema_version": SCENARIO_STEP_STATE_SCHEMA_VERSION, + "scenario_id": scenario_id, + "domain": domain, + "step_id": step["step_id"], + "step_index": step_index, + "title": step["title"], + "depends_on": step["depends_on"], + "question_template": step["question_template"], + "question_resolved": question_resolved, + "expected_capability": step.get("expected_capability"), + "expected_result_mode": step.get("expected_result_mode"), + "reply_type": "backend_error" if status == "blocked" else "unresolved_followup", + "assistant_message_id": None, + "trace_id": None, + "detected_mode": None, + "detected_intent": None, + "selected_recipe": None, + "capability_id": None, + "capability_route_mode": None, + "route_expectation_status": None, + "result_mode": None, + "response_type": None, + "fallback_type": failure_type, + "mcp_call_status": None, + "balance_confirmed": None, + "active_result_set_id": None, + "last_confirmed_route": None, + "date_scope": None, + "organization_scope": None, + "entries": [], + "status": status, + "failure_type": failure_type, + "error_message": error_message, + } + + +def normalize_step_definition(index: int, raw_step: Any) -> dict[str, Any]: + if isinstance(raw_step, str): + step_id = f"step_{index:02d}" + question_template = raw_step.strip() + if not question_template: + raise RuntimeError(f"Scenario step {index} must not be empty") + return { + "step_id": step_id, + "title": step_id, + "question_template": question_template, + "depends_on": [], + "analysis_context": {}, + "expected_capability": None, + "expected_result_mode": None, + } + if not isinstance(raw_step, dict): + raise RuntimeError(f"Scenario step {index} must be a string or object") + + raw_step_id = str(raw_step.get("step_id") or "").strip() + step_id = slugify_token(raw_step_id, f"step_{index:02d}") + question_template = str(raw_step.get("question") or raw_step.get("question_template") or "").strip() + if not question_template: + raise RuntimeError(f"Scenario step `{step_id}` must define `question` or `question_template`") + depends_on_raw = raw_step.get("depends_on") + depends_on: list[str] = [] + if isinstance(depends_on_raw, str) and depends_on_raw.strip(): + depends_on = [depends_on_raw.strip()] + elif isinstance(depends_on_raw, list): + depends_on = [str(item).strip() for item in depends_on_raw if str(item).strip()] + return { + "step_id": step_id, + "title": str(raw_step.get("title") or step_id).strip() or step_id, + "question_template": question_template, + "depends_on": depends_on, + "analysis_context": normalize_analysis_context(raw_step.get("analysis_context")), + "expected_capability": str(raw_step.get("expected_capability") or "").strip() or None, + "expected_result_mode": str(raw_step.get("expected_result_mode") or "").strip() or None, + } + + +def normalize_scenario_manifest( + raw_manifest: dict[str, Any], + *, + fallback_domain: str | None = None, + fallback_analysis_context: Any = None, + fallback_bindings: Any = None, + default_scenario_id: str | None = None, +) -> dict[str, Any]: + domain = str(raw_manifest.get("domain") or fallback_domain or "").strip() + if not domain: + raise RuntimeError("Scenario manifest must define `domain`") + raw_steps = raw_manifest.get("steps") + if not isinstance(raw_steps, list) or not raw_steps: + raise RuntimeError("Scenario manifest must define non-empty `steps`") + + steps = [normalize_step_definition(index + 1, raw_step) for index, raw_step in enumerate(raw_steps)] + scenario_id = str(raw_manifest.get("scenario_id") or "").strip() or default_scenario_id or slugify_case_id(domain, None) + title = str(raw_manifest.get("title") or domain).strip() or domain + description = str(raw_manifest.get("description") or "").strip() or None + analysis_context = merge_analysis_context(fallback_analysis_context, raw_manifest.get("analysis_context")) + if analysis_context and "source" not in analysis_context: + analysis_context["source"] = "scenario_manifest" + bindings = normalize_bindings(fallback_bindings) + bindings.update(normalize_bindings(raw_manifest.get("bindings"))) + return { + "schema_version": str(raw_manifest.get("schema_version") or SCENARIO_MANIFEST_SCHEMA_VERSION), + "scenario_id": scenario_id, + "domain": domain, + "title": title, + "description": description, + "analysis_context": analysis_context, + "bindings": bindings, + "steps": steps, + } + + +def load_scenario_manifest(file_path: Path) -> dict[str, Any]: + raw_manifest = read_json_file(file_path) + return normalize_scenario_manifest(raw_manifest) + + +def load_scenario_pack(file_path: Path) -> dict[str, Any]: + raw_pack = read_json_file(file_path) + domain = str(raw_pack.get("domain") or "").strip() + if not domain: + raise RuntimeError("Scenario pack must define `domain`") + raw_scenarios = raw_pack.get("scenarios") + if not isinstance(raw_scenarios, list) or not raw_scenarios: + raise RuntimeError("Scenario pack must define non-empty `scenarios`") + + pack_id = str(raw_pack.get("pack_id") or "").strip() or slugify_case_id(domain, None) + title = str(raw_pack.get("title") or domain).strip() or domain + description = str(raw_pack.get("description") or "").strip() or None + analysis_context = normalize_analysis_context(raw_pack.get("analysis_context")) + if analysis_context and "source" not in analysis_context: + analysis_context["source"] = "scenario_pack" + bindings = normalize_bindings(raw_pack.get("bindings")) + scenarios = [ + normalize_scenario_manifest( + raw_scenario, + fallback_domain=domain, + fallback_analysis_context=analysis_context, + fallback_bindings=bindings, + default_scenario_id=f"{pack_id}_scenario_{index + 1:02d}", + ) + for index, raw_scenario in enumerate(raw_scenarios) + ] + return { + "schema_version": str(raw_pack.get("schema_version") or SCENARIO_PACK_SCHEMA_VERSION), + "pack_id": pack_id, + "domain": domain, + "title": title, + "description": description, + "analysis_context": analysis_context, + "bindings": bindings, + "scenarios": scenarios, + } + + +def ensure_scenario_brief(scenario_dir: Path, manifest: dict[str, Any]) -> None: + file_path = scenario_dir / "scenario_brief.md" + if file_path.exists(): + return + steps_lines = "\n".join( + f"{index}. `{step['step_id']}` - {step['question_template']}" for index, step in enumerate(manifest["steps"], start=1) + ) + content = textwrap.dedent( + f"""\ + # Scenario brief + + ## Domain + `{manifest["domain"]}` + + ## Scenario id + `{manifest["scenario_id"]}` + + ## Title + {manifest["title"]} + + ## Description + {manifest.get("description") or ""} + + ## Shared analysis context + ```json + {dump_json(manifest.get("analysis_context") or {})} + ``` + + ## Bindings + ```json + {dump_json(manifest.get("bindings") or {})} + ``` + + ## Steps + {steps_lines} + + ## Constraints + - no architecture changes + - reuse current assistant runtime and session state + - 1C/MCP first + - no fabricated values + - missing route or capability is domain enablement work, not silent rejection + """ + ) + write_text(file_path, content) + + +def normalize_field_key(raw_key: str) -> str: + normalized = re.sub(r"\s+", " ", raw_key.strip().lower().replace("ё", "е")).strip(" :") + aliases = { + "склад": "warehouse", + "количество": "quantity", + "стоимость": "amount", + "сумма": "amount", + "организация": "organization", + "дата строки": "row_date", + "дата": "date", + "поставщик": "supplier", + "договор": "contract", + "документ": "document", + "документы": "documents", + "покупатель": "customer", + } + if normalized in aliases: + return aliases[normalized] + return slugify_token(normalized, "field").lower() + + +def extract_structured_entries(answer_text: str) -> list[dict[str, Any]]: + entries: list[dict[str, Any]] = [] + for line in answer_text.splitlines(): + match = re.match(r"^\s*(\d+)\.\s+(.*\S)\s*$", line) + if not match: + continue + index = int(match.group(1)) + payload = match.group(2).strip() + segments = [segment.strip() for segment in payload.split(" | ") if segment.strip()] + title = segments[0] if segments else payload + fields: dict[str, str] = {} + raw_fields: dict[str, str] = {} + for segment in segments[1:]: + if ":" not in segment: + continue + raw_key, raw_value = segment.split(":", 1) + key = normalize_field_key(raw_key) + value = raw_value.strip() + fields[key] = value + raw_fields[raw_key.strip()] = value + entry: dict[str, Any] = { + "index": index, + "title": title, + "item": title, + "fields": fields, + "raw_fields": raw_fields, + "raw_line": line.strip(), + } + for key, value in fields.items(): + entry[key] = value + entries.append(entry) + return entries + + +def derive_step_status(reply_type: str | None, debug_payload: dict[str, Any]) -> str: + if reply_type == "backend_error": + return "blocked" + capability_route_mode = str(debug_payload.get("capability_route_mode") or "").strip() + fallback_type = str(debug_payload.get("fallback_type") or "").strip() + selected_recipe = str(debug_payload.get("selected_recipe") or "").strip() + detected_intent = str(debug_payload.get("detected_intent") or "").strip() + detected_mode = str(debug_payload.get("detected_mode") or "").strip() + if capability_route_mode == "exact" and fallback_type in {"", "none"} and reply_type in {"factual", "factual_with_explanation", "empty_but_valid"}: + return "exact" + if fallback_type in {"out_of_scope", "unknown"}: + return "needs_exact_capability" + if capability_route_mode == "exact" and detected_mode == "address_query": + return "partial" + if detected_mode == "address_query" and (selected_recipe or detected_intent): + return "partial" + if reply_type in {"partial_coverage", "clarification_required", "out_of_scope", "no_grounded_answer", "route_mismatch_blocked"}: + return "needs_exact_capability" + return "needs_exact_capability" + + +def build_scenario_step_state( + *, + scenario_id: str, + domain: str, + step: dict[str, Any], + step_index: int, + question_resolved: str, + turn_artifact: dict[str, Any], + entries: list[dict[str, Any]], +) -> dict[str, Any]: + debug_payload = turn_artifact.get("technical_debug_payload") + debug = debug_payload if isinstance(debug_payload, dict) else {} + session_summary = turn_artifact.get("session_summary") + summary = session_summary if isinstance(session_summary, dict) else {} + address_state = summary.get("address_navigation_state") + navigation_state = address_state if isinstance(address_state, dict) else {} + session_context = navigation_state.get("session_context") + context = session_context if isinstance(session_context, dict) else {} + assistant_message = turn_artifact.get("assistant_message") + assistant_item = assistant_message if isinstance(assistant_message, dict) else {} + reply_type = assistant_item.get("reply_type") + + step_state = { + "schema_version": SCENARIO_STEP_STATE_SCHEMA_VERSION, + "scenario_id": scenario_id, + "domain": domain, + "step_id": step["step_id"], + "step_index": step_index, + "title": step["title"], + "depends_on": step["depends_on"], + "question_template": step["question_template"], + "question_resolved": question_resolved, + "expected_capability": step.get("expected_capability"), + "expected_result_mode": step.get("expected_result_mode"), + "reply_type": reply_type, + "assistant_message_id": assistant_item.get("message_id"), + "trace_id": assistant_item.get("trace_id"), + "detected_mode": debug.get("detected_mode"), + "detected_intent": debug.get("detected_intent"), + "selected_recipe": debug.get("selected_recipe"), + "capability_id": debug.get("capability_id"), + "capability_route_mode": debug.get("capability_route_mode"), + "route_expectation_status": debug.get("route_expectation_status"), + "result_mode": debug.get("result_mode"), + "response_type": debug.get("response_type"), + "fallback_type": debug.get("fallback_type"), + "mcp_call_status": debug.get("mcp_call_status"), + "balance_confirmed": debug.get("balance_confirmed"), + "active_result_set_id": context.get("active_result_set_id"), + "last_confirmed_route": context.get("last_confirmed_route"), + "date_scope": context.get("date_scope"), + "organization_scope": context.get("organization_scope"), + "entries": entries, + } + step_state["status"] = derive_step_status(reply_type if isinstance(reply_type, str) else None, debug) + return step_state + + +def save_scenario_step_bundle( + *, + step_dir: Path, + export_markdown: str, + turn_artifact: dict[str, Any], + session_record: dict[str, Any], + response_payload: dict[str, Any], + step_state: dict[str, Any], +) -> None: + debug_payload = turn_artifact.get("technical_debug_payload") + write_text(step_dir / "output.md", export_markdown) + write_json(step_dir / "debug.json", debug_payload if debug_payload is not None else {}) + write_json(step_dir / "turn.json", turn_artifact) + write_json(step_dir / "session.json", session_record) + write_json(step_dir / "assistant_response.json", response_payload) + write_json(step_dir / "step_state.json", step_state) + write_text(step_dir / "resolved_question.txt", f"{step_state['question_resolved']}\n") + + +def derive_scenario_status(step_outputs: dict[str, dict[str, Any]]) -> str: + statuses = [str(item.get("status") or "") for item in step_outputs.values()] + if not statuses: + return "blocked" + if any(status == "blocked" for status in statuses): + return "blocked" + if any(status == "needs_exact_capability" for status in statuses): + return "needs_exact_capability" + if any(status == "partial" for status in statuses): + return "partial" + return "accepted" + + +def build_scenario_summary(manifest: dict[str, Any], scenario_state: dict[str, Any], final_status: str) -> str: + lines = [ + "# Scenario summary", + "", + f"- scenario_id: `{manifest['scenario_id']}`", + f"- domain: `{manifest['domain']}`", + f"- title: {manifest['title']}", + f"- session_id: `{scenario_state.get('session_id') or 'n/a'}`", + f"- final_status: `{final_status}`", + "", + "## Steps", + ] + for index, step in enumerate(manifest["steps"], start=1): + step_output = scenario_state.get("step_outputs", {}).get(step["step_id"], {}) + lines.extend( + [ + f"{index}. `{step['step_id']}` - {step['question_template']}", + f"status: `{step_output.get('status') or 'n/a'}`", + f"question_resolved: {step_output.get('question_resolved') or 'n/a'}", + f"intent: `{step_output.get('detected_intent') or 'n/a'}`", + f"recipe: `{step_output.get('selected_recipe') or 'n/a'}`", + f"capability: `{step_output.get('capability_id') or 'n/a'}`", + f"result_mode: `{step_output.get('result_mode') or 'n/a'}`", + f"result_set: `{step_output.get('active_result_set_id') or 'n/a'}`", + "", + ] + ) + return "\n".join(lines).strip() + "\n" + + +def build_scenario_final_status(manifest: dict[str, Any], scenario_state: dict[str, Any], final_status: str) -> str: + reason = { + "accepted": "all scenario steps executed in one assistant session with no unresolved route or capability gaps", + "partial": "scenario captured successfully, but at least one step still needs exact capability enablement or route hardening", + "needs_exact_capability": "scenario is valid for the project, but at least one step still requires exact capability or route enablement", + "blocked": "scenario run was interrupted by runtime or backend failure", + }.get(final_status, "scenario status unknown") + return textwrap.dedent( + f"""\ + # Final status + + - status: `{final_status}` + - scenario_id: `{manifest['scenario_id']}` + - session_id: `{scenario_state.get('session_id') or 'n/a'}` + - reason: {reason} + """ + ) + + +def run_assistant_step( + *, + args: argparse.Namespace, + domain: str, + scenario_id: str, + step: dict[str, Any], + step_index: int, + session_id: str | None, + question_resolved: str, + analysis_context: dict[str, Any], +) -> dict[str, Any]: + payload = build_assistant_message_payload( + args, + question=question_resolved, + session_id=session_id, + analysis_context=analysis_context, + ) + response_payload = http_json( + f"{args.backend_url}/api/assistant/message", + method="POST", + payload=payload, + timeout=max(30, int(args.timeout_seconds)), + ) + resolved_session_id = str(response_payload.get("session_id") or session_id or "").strip() + if not resolved_session_id: + raise RuntimeError(f"Assistant response for step `{step['step_id']}` does not contain session_id") + + session_record = fetch_session_snapshot(args.backend_url, resolved_session_id, args.timeout_seconds) + conversation = extract_conversation_from_session(session_record) + export_markdown = build_conversation_export(resolved_session_id, conversation, mode="technical") + turn_artifact = build_turn_artifact( + slot="step", + domain=domain, + case_id=scenario_id, + question=question_resolved, + session_id=resolved_session_id, + conversation=conversation, + session_record=session_record, + job_record=None, + report_case=None, + export_file_name="output.md", + ) + turn_artifact["schema_version"] = "domain_scenario_turn_artifact_v1" + turn_artifact["scenario"] = { + "scenario_id": scenario_id, + "step_id": step["step_id"], + "step_index": step_index, + "question_template": step["question_template"], + "question_resolved": question_resolved, + "depends_on": step["depends_on"], + "analysis_context": analysis_context, + } + last_assistant = find_last_assistant(conversation) + entries = extract_structured_entries(str(last_assistant.get("text") or "")) + step_state = build_scenario_step_state( + scenario_id=scenario_id, + domain=domain, + step=step, + step_index=step_index, + question_resolved=question_resolved, + turn_artifact=turn_artifact, + entries=entries, + ) + return { + "session_id": resolved_session_id, + "response_payload": response_payload, + "session_record": session_record, + "conversation": conversation, + "export_markdown": export_markdown, + "turn_artifact": turn_artifact, + "step_state": step_state, + } + + +def execute_scenario_manifest( + *, + args: argparse.Namespace, + manifest: dict[str, Any], + scenario_dir: Path, + manifest_source_label: str | None, +) -> tuple[dict[str, Any], str]: + steps_dir = scenario_dir / "steps" + steps_dir.mkdir(parents=True, exist_ok=True) + write_json(scenario_dir / "scenario_manifest.json", manifest) + if manifest_source_label: + write_text(scenario_dir / "manifest_source.txt", f"{manifest_source_label}\n") + ensure_scenario_brief(scenario_dir, manifest) + + scenario_state: dict[str, Any] = { + "schema_version": SCENARIO_STATE_SCHEMA_VERSION, + "scenario_id": manifest["scenario_id"], + "domain": manifest["domain"], + "title": manifest["title"], + "session_id": None, + "analysis_context": manifest.get("analysis_context") or {}, + "bindings": manifest.get("bindings") or {}, + "step_outputs": {}, + "semantic_memory": {}, + "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + } + write_json(scenario_dir / "scenario_state.json", scenario_state) + + last_export_markdown = "" + for step_index, step in enumerate(manifest["steps"], start=1): + step_dir = steps_dir / step["step_id"] + step_analysis_context = merge_analysis_context(manifest.get("analysis_context"), step.get("analysis_context")) + try: + resolved_question = resolve_question_template(step["question_template"], scenario_state) + result = run_assistant_step( + args=args, + domain=manifest["domain"], + scenario_id=manifest["scenario_id"], + step=step, + step_index=step_index, + session_id=scenario_state.get("session_id"), + question_resolved=resolved_question, + analysis_context=step_analysis_context, + ) + except RuntimeError as exc: + failure_type = "runtime_error" + failure_status = "blocked" + if "Placeholder `" in str(exc): + failure_type = "placeholder_resolution_error" + failure_status = "needs_exact_capability" + step_state = build_failed_step_state( + scenario_id=manifest["scenario_id"], + domain=manifest["domain"], + step=step, + step_index=step_index, + question_resolved=step["question_template"], + status=failure_status, + failure_type=failure_type, + error_message=str(exc), + ) + scenario_state["step_outputs"][step["step_id"]] = step_state + scenario_state["semantic_memory"] = { + **(scenario_state.get("semantic_memory") or {}), + "latest_step_id": step["step_id"], + "latest_step_status": step_state["status"], + } + scenario_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + save_scenario_step_bundle( + step_dir=step_dir, + export_markdown="", + turn_artifact={}, + session_record={}, + response_payload={}, + step_state=step_state, + ) + write_json(scenario_dir / "scenario_state.json", scenario_state) + print( + f"[domain-case-loop] scenario {manifest['scenario_id']} step {step_index}/{len(manifest['steps'])}: " + f"{step['step_id']} -> {step_state['status']} ({failure_type})" + ) + if failure_status == "blocked": + break + continue + + scenario_state["session_id"] = result["session_id"] + scenario_state["step_outputs"][step["step_id"]] = result["step_state"] + scenario_state["semantic_memory"] = { + "latest_step_id": step["step_id"], + "latest_step_status": result["step_state"].get("status"), + "active_result_set_id": result["step_state"].get("active_result_set_id"), + "last_confirmed_route": result["step_state"].get("last_confirmed_route"), + "date_scope": result["step_state"].get("date_scope"), + "organization_scope": result["step_state"].get("organization_scope"), + "entries": result["step_state"].get("entries"), + } + scenario_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + save_scenario_step_bundle( + step_dir=step_dir, + export_markdown=result["export_markdown"], + turn_artifact=result["turn_artifact"], + session_record=result["session_record"], + response_payload=result["response_payload"], + step_state=result["step_state"], + ) + write_json(scenario_dir / "scenario_state.json", scenario_state) + last_export_markdown = result["export_markdown"] + print( + f"[domain-case-loop] scenario {manifest['scenario_id']} step {step_index}/{len(manifest['steps'])}: " + f"{step['step_id']} -> {result['step_state']['status']}" + ) + + final_status = derive_scenario_status(scenario_state["step_outputs"]) + write_text(scenario_dir / "scenario_output.md", last_export_markdown or "") + write_text(scenario_dir / "scenario_summary.md", build_scenario_summary(manifest, scenario_state, final_status)) + write_text(scenario_dir / "final_status.md", build_scenario_final_status(manifest, scenario_state, final_status)) + if scenario_state.get("session_id"): + write_text(scenario_dir / "session_id.txt", f"{scenario_state['session_id']}\n") + print(f"[domain-case-loop] saved scenario artifacts to {scenario_dir}") + print(f"[domain-case-loop] final_status={final_status}") + return scenario_state, final_status + + def handle_run_case(args: argparse.Namespace) -> int: + ensure_backend_health(args.backend_url, args.timeout_seconds) case_id = slugify_case_id(args.domain, args.case_id) case_dir = Path(args.output_root).resolve() / case_id case_dir.mkdir(parents=True, exist_ok=True) @@ -451,15 +1323,7 @@ def handle_run_case(args: argparse.Namespace) -> int: ) payload: dict[str, Any] = { - "normalizeConfig": { - "llmProvider": args.llm_provider, - "apiKey": args.llm_api_key, - "model": args.llm_model, - "baseUrl": args.llm_base_url, - "temperature": args.temperature, - "maxOutputTokens": args.max_output_tokens, - "promptVersion": args.prompt_version, - }, + "normalizeConfig": build_normalize_config(args), "eval_target": "assistant_stage1", "questions": [args.question], "useMock": bool(args.use_mock), @@ -566,8 +1430,718 @@ def handle_import_export(args: argparse.Namespace) -> int: return 0 +def handle_run_scenario(args: argparse.Namespace) -> int: + ensure_backend_health(args.backend_url, args.timeout_seconds) + manifest_path = Path(args.manifest).resolve() + manifest = load_scenario_manifest(manifest_path) + if args.scenario_id: + manifest["scenario_id"] = args.scenario_id.strip() + if args.analysis_date: + manifest["analysis_context"] = merge_analysis_context( + manifest.get("analysis_context"), + {"as_of_date": args.analysis_date, "source": "cli_override"}, + ) + scenario_dir = Path(args.output_root).resolve() / manifest["scenario_id"] + execute_scenario_manifest( + args=args, + manifest=manifest, + scenario_dir=scenario_dir, + manifest_source_label=str(manifest_path), + ) + return 0 + + +def build_pack_summary(pack: dict[str, Any], scenario_results: list[dict[str, Any]], final_status: str) -> str: + lines = [ + "# Pack summary", + "", + f"- pack_id: `{pack['pack_id']}`", + f"- domain: `{pack['domain']}`", + f"- title: {pack['title']}", + f"- final_status: `{final_status}`", + "", + "## Scenarios", + ] + for index, item in enumerate(scenario_results, start=1): + lines.extend( + [ + f"{index}. `{item['scenario_id']}` - {item['title']}", + f"status: `{item['final_status']}`", + f"session_id: `{item.get('session_id') or 'n/a'}`", + f"artifact_dir: `{item['artifact_dir']}`", + "", + ] + ) + return "\n".join(lines).strip() + "\n" + + +def build_pack_final_status(pack: dict[str, Any], scenario_results: list[dict[str, Any]], final_status: str) -> str: + expected_scenarios = len(pack.get("scenarios") or []) + executed_scenarios = len(scenario_results) + has_missing_scenarios = executed_scenarios < expected_scenarios + reason = { + "accepted": "all declared scenarios in the pack executed without blocked or missing-capability states", + "partial": "the pack executed, but at least one scenario still needs capability enablement or route hardening", + "needs_exact_capability": "the pack is valid, but at least one scenario still requires exact capability or route enablement", + "blocked": "the pack could not be completed because at least one scenario failed at runtime", + }.get(final_status, "pack status unknown") + if final_status == "accepted" and has_missing_scenarios: + reason = ( + f"only {executed_scenarios}/{expected_scenarios} declared scenarios were executed; " + "missing scenarios keep the pack from being confirmed as accepted" + ) + return textwrap.dedent( + f"""\ + # Final status + + - status: `{final_status}` + - pack_id: `{pack['pack_id']}` + - domain: `{pack['domain']}` + - reason: {reason} + """ + ) + + +def run_subprocess_command( + command: list[str], + *, + cwd: Path, + timeout_seconds: int, + input_text: str | None = None, + stdout_path: Path | None = None, + stderr_path: Path | None = None, +) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + command, + cwd=str(cwd), + input=input_text, + text=True, + encoding="utf-8", + errors="replace", + capture_output=True, + timeout=timeout_seconds, + check=False, + ) + if stdout_path is not None: + write_text(stdout_path, result.stdout) + if stderr_path is not None: + write_text(stderr_path, result.stderr) + if result.returncode != 0: + raise RuntimeError( + f"Command failed with exit code {result.returncode}: {' '.join(command)}\n{result.stderr.strip()}" + ) + return result + + +def build_run_pack_command( + args: argparse.Namespace, + *, + manifest_path: Path, + pack_id: str, + output_root: Path, +) -> list[str]: + command = [ + sys.executable, + str(Path(__file__).resolve()), + "run-pack", + "--manifest", + str(manifest_path), + "--pack-id", + pack_id, + "--output-root", + str(output_root), + "--backend-url", + str(args.backend_url), + "--prompt-version", + str(args.prompt_version), + "--llm-provider", + str(args.llm_provider), + "--llm-model", + str(args.llm_model), + "--llm-base-url", + str(args.llm_base_url), + "--llm-api-key", + str(args.llm_api_key), + "--temperature", + str(args.temperature), + "--max-output-tokens", + str(args.max_output_tokens), + "--timeout-seconds", + str(args.timeout_seconds), + ] + if getattr(args, "analysis_date", None): + command.extend(["--analysis-date", str(args.analysis_date)]) + max_scenarios = getattr(args, "max_scenarios", None) + if max_scenarios is not None: + command.extend(["--max-scenarios", str(int(max_scenarios))]) + if bool(getattr(args, "use_mock", False)): + command.append("--use-mock") + return command + + +def build_codex_exec_command( + args: argparse.Namespace, + *, + output_file: Path, + schema_file: Path, + sandbox_mode: str, + model_override: str | None = None, + reasoning_effort: str | None = None, +) -> list[str]: + command = [ + str(args.codex_binary), + "exec", + "-C", + str(REPO_ROOT), + "-s", + sandbox_mode, + "-c", + 'approval_policy="never"', + "--output-schema", + str(schema_file), + "-o", + str(output_file), + "--color", + "never", + ] + if reasoning_effort: + command.extend(["-c", f'model_reasoning_effort="{reasoning_effort}"']) + if getattr(args, "codex_profile", None): + command.extend(["-p", str(args.codex_profile)]) + selected_model = model_override or getattr(args, "codex_model", None) + if selected_model: + command.extend(["-m", str(selected_model)]) + return command + + +def read_json_output(file_path: Path) -> dict[str, Any]: + payload = json.loads(read_text_file(file_path)) + if not isinstance(payload, dict): + raise RuntimeError(f"Expected JSON object in {file_path}") + return payload + + +def compact_step_output_for_review(step_output: Any) -> dict[str, Any]: + if not isinstance(step_output, dict): + return {} + entry_titles_sample: list[str] = [] + entries = step_output.get("entries") + if isinstance(entries, list): + for item in entries[:5]: + if not isinstance(item, dict): + continue + title = str(item.get("item") or item.get("title") or "").strip() + if title: + entry_titles_sample.append(title) + return { + "status": step_output.get("status"), + "question_resolved": step_output.get("question_resolved"), + "detected_intent": step_output.get("detected_intent"), + "selected_recipe": step_output.get("selected_recipe"), + "capability_id": step_output.get("capability_id"), + "result_mode": step_output.get("result_mode"), + "fallback_type": step_output.get("fallback_type"), + "mcp_call_status": step_output.get("mcp_call_status"), + "failure_type": step_output.get("failure_type"), + "error_message": step_output.get("error_message"), + "entry_titles_sample": entry_titles_sample, + } + + +def build_pack_review_bundle(pack_dir: Path) -> str: + pack_state = read_json_file(pack_dir / "pack_state.json") if (pack_dir / "pack_state.json").exists() else {} + scenarios_root = pack_dir / "scenarios" + scenarios_bundle: list[dict[str, Any]] = [] + if scenarios_root.exists(): + for scenario_dir in sorted(path for path in scenarios_root.iterdir() if path.is_dir()): + scenario_state = read_json_file(scenario_dir / "scenario_state.json") if (scenario_dir / "scenario_state.json").exists() else {} + step_outputs_raw = scenario_state.get("step_outputs") + compact_steps: dict[str, Any] = {} + if isinstance(step_outputs_raw, dict): + for step_id, step_output in step_outputs_raw.items(): + compact_steps[str(step_id)] = compact_step_output_for_review(step_output) + scenarios_bundle.append( + { + "scenario_id": scenario_state.get("scenario_id") or scenario_dir.name, + "title": scenario_state.get("title"), + "session_id": scenario_state.get("session_id"), + "summary": read_text_file(scenario_dir / "scenario_summary.md") if (scenario_dir / "scenario_summary.md").exists() else "", + "step_outputs": compact_steps, + } + ) + bundle = { + "pack_state": { + "pack_id": pack_state.get("pack_id"), + "domain": pack_state.get("domain"), + "title": pack_state.get("title"), + "final_status": pack_state.get("final_status"), + "scenario_results": pack_state.get("scenario_results"), + }, + "pack_summary": read_text_file(pack_dir / "pack_summary.md") if (pack_dir / "pack_summary.md").exists() else "", + "scenarios": scenarios_bundle, + } + return dump_json(bundle) + + +def build_analyst_loop_prompt( + *, + loop_dir: Path, + iteration_dir: Path, + pack_dir: Path, + previous_pack_dir: Path | None, + previous_verdict_path: Path | None, + target_score: int, + review_bundle_json: str, + previous_verdict_json: str | None, +) -> str: + comparison_block = "" + if previous_pack_dir is not None: + comparison_block = textwrap.dedent( + f"""\ + Compare the current run with the previous run: + - previous_pack_dir: `{previous_pack_dir}` + """ + ) + if previous_verdict_path is not None and previous_verdict_path.exists(): + comparison_block += f"- previous_analyst_verdict: `{previous_verdict_path}`\n" + previous_verdict_block = "" + if previous_verdict_json: + previous_verdict_block = textwrap.dedent( + f"""\ + + Previous analyst verdict JSON: + ```json + {previous_verdict_json} + ``` + """ + ) + return textwrap.dedent( + f"""\ + You are the strict `domain_analyst` for NDC_1C. + + Use the repo rules from: + - `.codex/agents/domain_analyst.toml` + - `.codex/skills/domain-case-loop/SKILL.md` + - `.codex/skills/domain-case-loop/references/verdict_template.md` + + Current loop context: + - loop_dir: `{loop_dir}` + - iteration_dir: `{iteration_dir}` + - current_pack_dir: `{pack_dir}` + {comparison_block} + + Required artifacts to inspect: + - `{pack_dir / 'pack_summary.md'}` + - `{pack_dir / 'pack_state.json'}` + - all `scenario_summary.md`, `scenario_state.json`, and problematic `steps/*/step_state.json` files inside `{pack_dir / 'scenarios'}` + + Goal: + - evaluate current domain-pack correctness for business meaning, route/capability quality, evidence quality, and absence of silent heuristic masking; + - determine whether the gate `quality_score >= {target_score}` is reached; + - if not, provide the smallest high-value fix targets for the coder. + + Rules: + - `accepted` is allowed only if quality_score >= {target_score}, unresolved_p0_count = 0, and regression_detected = false; + - `partial` means the pack is usable but exactness, routing, or coverage is still insufficient; + - `needs_exact_capability` means the primary blocker is a missing exact route or capability, but the loop should still continue autonomously unless a user decision is required; + - `continue` means there is a clear next patch cycle; + - `blocked` means the loop is stopped by a real hard blocker such as runtime/infrastructure failure, unavailable 1C data, or another condition that the repo cannot autonomously repair; + - set `requires_user_decision = true` when the next step cannot be chosen safely without user input, for example: + - an architecture fork or risky contour expansion; + - a scope tradeoff or important business ambiguity; + - a required observation anchor is missing and cannot be recovered safely from artifacts, 1C, or the current scenario state; + - the only remaining implementation path would rely on a hack, brittle workaround, heuristic masking, or disproportionate complexity/risk; + - if `requires_user_decision = true`, fill `user_decision_type` and `user_decision_prompt`; + - if the pack is below {target_score} but there is still safe autonomous implementation work, keep `requires_user_decision = false`; + - do not request user input merely because the score is still below {target_score}; request it only when the loop would otherwise guess, overfit, or risk architecture drift. + + Use this UTF-8 evidence bundle as the source of truth for artifact contents. Do not treat shell rendering artifacts as file corruption if the embedded bundle is readable. + + Current evidence bundle: + ```json + {review_bundle_json} + ``` + {previous_verdict_block} + + Return JSON only and follow the schema exactly. + """ + ).strip() + + +def build_coder_loop_prompt( + *, + loop_dir: Path, + iteration_dir: Path, + pack_dir: Path, + analyst_verdict_path: Path, + analyst_verdict_json: str, +) -> str: + return textwrap.dedent( + f"""\ + You are the `domain_coder` for NDC_1C. + + Use the repo rules from: + - `.codex/agents/domain_coder.toml` + - `.codex/skills/domain-case-loop/SKILL.md` + + Current loop context: + - loop_dir: `{loop_dir}` + - iteration_dir: `{iteration_dir}` + - current_pack_dir: `{pack_dir}` + - analyst_verdict_json: `{analyst_verdict_path}` + + Make the smallest domain-only patch in the working tree that improves the failing or partial scenarios named in the analyst verdict. + + Hard rules: + - do not change the architecture; + - do not fabricate data; + - do not present heuristic answers as confirmed; + - do not touch unrelated files; + - preserve already successful baseline flows. + + Required outputs: + - create `{iteration_dir / 'coder_plan.md'}` with a short plan; + - create `{iteration_dir / 'patch_summary.md'}` with a short summary of the patch; + + Analyst verdict JSON: + ```json + {analyst_verdict_json} + ``` + + - then return JSON only and follow the schema exactly. + """ + ).strip() + + +def evaluate_analyst_gate( + verdict: dict[str, Any], target_score: int +) -> tuple[bool, str, bool, str, str | None]: + quality_score = int(verdict.get("quality_score") or 0) + unresolved_p0_count = int(verdict.get("unresolved_p0_count") or 0) + regression_detected = bool(verdict.get("regression_detected")) + loop_decision = str(verdict.get("loop_decision") or "").strip() or "continue" + requires_user_decision = bool(verdict.get("requires_user_decision")) + user_decision_type = str(verdict.get("user_decision_type") or "").strip() or "none" + user_decision_prompt_raw = verdict.get("user_decision_prompt") + user_decision_prompt = str(user_decision_prompt_raw).strip() if user_decision_prompt_raw else None + accepted = quality_score >= target_score and unresolved_p0_count == 0 and not regression_detected and loop_decision == "accepted" + return accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt + + +def handle_run_pack(args: argparse.Namespace) -> int: + ensure_backend_health(args.backend_url, args.timeout_seconds) + pack_path = Path(args.manifest).resolve() + pack = load_scenario_pack(pack_path) + if args.pack_id: + pack["pack_id"] = args.pack_id.strip() + if args.analysis_date: + pack["analysis_context"] = merge_analysis_context( + pack.get("analysis_context"), + {"as_of_date": args.analysis_date, "source": "cli_override"}, + ) + + pack_dir = Path(args.output_root).resolve() / pack["pack_id"] + scenarios_dir = pack_dir / "scenarios" + scenarios_dir.mkdir(parents=True, exist_ok=True) + write_json(pack_dir / "pack_manifest.json", pack) + write_text(pack_dir / "manifest_source.txt", f"{pack_path}\n") + + scenario_results: list[dict[str, Any]] = [] + max_scenarios = max(0, int(args.max_scenarios)) if args.max_scenarios is not None else None + scenarios_to_run = pack["scenarios"][:max_scenarios] if max_scenarios else pack["scenarios"] + for scenario in scenarios_to_run: + scenario_manifest = normalize_scenario_manifest( + scenario, + fallback_domain=pack["domain"], + fallback_analysis_context=pack.get("analysis_context"), + fallback_bindings=pack.get("bindings"), + default_scenario_id=scenario["scenario_id"], + ) + scenario_dir = scenarios_dir / scenario_manifest["scenario_id"] + scenario_state, scenario_final_status = execute_scenario_manifest( + args=args, + manifest=scenario_manifest, + scenario_dir=scenario_dir, + manifest_source_label=f"{pack_path}#{scenario_manifest['scenario_id']}", + ) + scenario_results.append( + { + "scenario_id": scenario_manifest["scenario_id"], + "title": scenario_manifest["title"], + "final_status": scenario_final_status, + "session_id": scenario_state.get("session_id"), + "artifact_dir": str(scenario_dir), + } + ) + + aggregate_statuses = [item["final_status"] for item in scenario_results] + if not aggregate_statuses: + final_status = "blocked" + elif any(status == "blocked" for status in aggregate_statuses): + final_status = "blocked" + elif any(status == "needs_exact_capability" for status in aggregate_statuses): + final_status = "needs_exact_capability" + elif any(status == "partial" for status in aggregate_statuses): + final_status = "partial" + else: + final_status = "accepted" if len(scenario_results) == len(pack.get("scenarios") or []) else "partial" + + pack_state = { + "schema_version": SCENARIO_PACK_SCHEMA_VERSION, + "pack_id": pack["pack_id"], + "domain": pack["domain"], + "title": pack["title"], + "analysis_context": pack.get("analysis_context") or {}, + "bindings": pack.get("bindings") or {}, + "scenario_results": scenario_results, + "final_status": final_status, + "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + } + write_json(pack_dir / "pack_state.json", pack_state) + write_text(pack_dir / "pack_summary.md", build_pack_summary(pack, scenario_results, final_status)) + write_text(pack_dir / "final_status.md", build_pack_final_status(pack, scenario_results, final_status)) + print(f"[domain-case-loop] saved pack artifacts to {pack_dir}") + print(f"[domain-case-loop] final_status={final_status}") + return 0 + + +def build_loop_summary(loop_state: dict[str, Any]) -> str: + lines = [ + "# Loop summary", + "", + f"- loop_id: `{loop_state['loop_id']}`", + f"- target_score: `{loop_state['target_score']}`", + f"- max_iterations: `{loop_state['max_iterations']}`", + f"- final_status: `{loop_state['final_status']}`", + f"- last_analyst_decision: `{loop_state.get('last_analyst_decision') or 'n/a'}`", + "", + "## Iterations", + ] + for item in loop_state.get("iterations", []): + lines.extend( + [ + f"- `{item['iteration_id']}`", + f" baseline_pack_dir: `{item['pack_dir']}`", + f" analyst_score: `{item.get('quality_score')}`", + f" analyst_decision: `{item.get('loop_decision')}`", + f" accepted_gate: `{item.get('accepted_gate')}`", + f" requires_user_decision: `{item.get('requires_user_decision')}`", + f" user_decision_type: `{item.get('user_decision_type') or 'none'}`", + f" coder_status: `{item.get('coder_status') or 'n/a'}`", + f" analyst_verdict: `{item.get('analyst_verdict_path') or 'n/a'}`", + ] + ) + return "\n".join(lines).strip() + "\n" + + +def build_loop_final_status(loop_state: dict[str, Any]) -> str: + return textwrap.dedent( + f"""\ + # Final status + + - status: `{loop_state['final_status']}` + - loop_id: `{loop_state['loop_id']}` + - target_score: `{loop_state['target_score']}` + - iterations_ran: `{len(loop_state.get('iterations', []))}` + - last_analyst_decision: `{loop_state.get('last_analyst_decision') or 'n/a'}` + - stop_reason: {loop_state.get('stop_reason') or 'n/a'} + """ + ) + + +def handle_run_pack_loop(args: argparse.Namespace) -> int: + manifest_path = Path(args.manifest).resolve() + loop_id = str(args.loop_id or slugify_case_id("domain_pack_loop", None)).strip() + loop_dir = Path(args.output_root).resolve() / loop_id + iterations_dir = loop_dir / "iterations" + iterations_dir.mkdir(parents=True, exist_ok=True) + write_text(loop_dir / "manifest_source.txt", f"{manifest_path}\n") + + target_score = int(args.target_score) + max_iterations = int(args.max_iterations) + if max_iterations < 1: + raise RuntimeError("--max-iterations must be >= 1") + + loop_state: dict[str, Any] = { + "schema_version": AUTONOMOUS_LOOP_SCHEMA_VERSION, + "loop_id": loop_id, + "manifest_path": str(manifest_path), + "target_score": target_score, + "max_iterations": max_iterations, + "iterations": [], + "final_status": "partial", + "stop_reason": None, + "last_analyst_decision": None, + "last_user_decision_type": "none", + "last_user_decision_prompt": None, + "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + } + write_json(loop_dir / "loop_state.json", loop_state) + + previous_pack_dir: Path | None = None + previous_verdict_path: Path | None = None + + for iteration_index in range(max_iterations): + iteration_id = f"iteration_{iteration_index:02d}" + iteration_dir = iterations_dir / iteration_id + iteration_dir.mkdir(parents=True, exist_ok=True) + + pack_output_root = iteration_dir / "pack_output" + pack_id = "pack_run" + pack_command = build_run_pack_command( + args, + manifest_path=manifest_path, + pack_id=pack_id, + output_root=pack_output_root, + ) + run_subprocess_command( + pack_command, + cwd=REPO_ROOT, + timeout_seconds=max(600, int(args.timeout_seconds) * 20), + stdout_path=iteration_dir / "pack_run.stdout.log", + stderr_path=iteration_dir / "pack_run.stderr.log", + ) + pack_dir = pack_output_root / pack_id + + analyst_verdict_path = iteration_dir / "analyst_verdict.json" + review_bundle_json = build_pack_review_bundle(pack_dir) + previous_verdict_json = read_text_file(previous_verdict_path) if previous_verdict_path is not None and previous_verdict_path.exists() else None + analyst_prompt = build_analyst_loop_prompt( + loop_dir=loop_dir, + iteration_dir=iteration_dir, + pack_dir=pack_dir, + previous_pack_dir=previous_pack_dir, + previous_verdict_path=previous_verdict_path, + target_score=target_score, + review_bundle_json=review_bundle_json, + previous_verdict_json=previous_verdict_json, + ) + write_text(iteration_dir / "analyst_prompt.md", analyst_prompt + "\n") + analyst_command = build_codex_exec_command( + args, + output_file=analyst_verdict_path, + schema_file=Path(args.analyst_schema).resolve(), + sandbox_mode="read-only", + model_override=getattr(args, "analyst_codex_model", None), + reasoning_effort=getattr(args, "analyst_reasoning_effort", None), + ) + run_subprocess_command( + analyst_command, + cwd=REPO_ROOT, + timeout_seconds=int(args.codex_timeout_seconds), + input_text=analyst_prompt, + stdout_path=iteration_dir / "analyst_exec.stdout.log", + stderr_path=iteration_dir / "analyst_exec.stderr.log", + ) + analyst_verdict = read_json_output(analyst_verdict_path) + accepted_gate, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate( + analyst_verdict, target_score + ) + loop_state["last_analyst_decision"] = loop_decision + loop_state["last_user_decision_type"] = user_decision_type + loop_state["last_user_decision_prompt"] = user_decision_prompt + + iteration_record: dict[str, Any] = { + "iteration_id": iteration_id, + "pack_dir": str(pack_dir), + "quality_score": int(analyst_verdict.get("quality_score") or 0), + "loop_decision": loop_decision, + "accepted_gate": accepted_gate, + "requires_user_decision": requires_user_decision, + "user_decision_type": user_decision_type, + "user_decision_prompt": user_decision_prompt, + "analyst_verdict_path": str(analyst_verdict_path), + "coder_status": None, + } + + if accepted_gate: + loop_state["iterations"].append(iteration_record) + loop_state["final_status"] = "accepted" + loop_state["stop_reason"] = f"analyst accepted at {iteration_id}" + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + break + + if requires_user_decision: + loop_state["iterations"].append(iteration_record) + if loop_decision in {"needs_exact_capability", "partial", "blocked"}: + loop_state["final_status"] = loop_decision + else: + loop_state["final_status"] = "partial" + prompt_suffix = f" | prompt: {user_decision_prompt}" if user_decision_prompt else "" + loop_state["stop_reason"] = f"user_decision_required at {iteration_id}: {user_decision_type}{prompt_suffix}" + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + break + + if loop_decision == "blocked": + loop_state["iterations"].append(iteration_record) + loop_state["final_status"] = "blocked" + loop_state["stop_reason"] = f"analyst blocked at {iteration_id}" + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + break + + coder_result_path = iteration_dir / "coder_result.json" + coder_prompt = build_coder_loop_prompt( + loop_dir=loop_dir, + iteration_dir=iteration_dir, + pack_dir=pack_dir, + analyst_verdict_path=analyst_verdict_path, + analyst_verdict_json=dump_json(analyst_verdict), + ) + write_text(iteration_dir / "coder_prompt.md", coder_prompt + "\n") + coder_command = build_codex_exec_command( + args, + output_file=coder_result_path, + schema_file=Path(args.coder_schema).resolve(), + sandbox_mode="workspace-write", + model_override=getattr(args, "coder_codex_model", None), + reasoning_effort=getattr(args, "coder_reasoning_effort", None), + ) + run_subprocess_command( + coder_command, + cwd=REPO_ROOT, + timeout_seconds=int(args.codex_timeout_seconds), + input_text=coder_prompt, + stdout_path=iteration_dir / "coder_exec.stdout.log", + stderr_path=iteration_dir / "coder_exec.stderr.log", + ) + coder_result = read_json_output(coder_result_path) + coder_status = str(coder_result.get("status") or "").strip() or "unknown" + iteration_record["coder_status"] = coder_status + iteration_record["coder_result_path"] = str(coder_result_path) + loop_state["iterations"].append(iteration_record) + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + + if coder_status == "blocked": + loop_state["final_status"] = "blocked" + loop_state["stop_reason"] = f"coder stopped progress at {iteration_id}: {coder_status}" + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + break + + previous_pack_dir = pack_dir + previous_verdict_path = analyst_verdict_path + else: + if loop_state.get("last_analyst_decision") == "needs_exact_capability": + loop_state["final_status"] = "needs_exact_capability" + else: + loop_state["final_status"] = "partial" + loop_state["stop_reason"] = f"max_iterations_reached ({max_iterations})" + loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_json(loop_dir / "loop_state.json", loop_state) + + write_text(loop_dir / "loop_summary.md", build_loop_summary(loop_state)) + write_text(loop_dir / "final_status.md", build_loop_final_status(loop_state)) + print(f"[domain-case-loop] saved loop artifacts to {loop_dir}") + print(f"[domain-case-loop] final_status={loop_state['final_status']}") + return 0 + + def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Repo-native helper for NDC_1C domain-case orchestration") + parser = argparse.ArgumentParser(description="Repo-native helper for NDC_1C domain-case and scenario orchestration") subparsers = parser.add_subparsers(dest="command", required=True) run_case = subparsers.add_parser("run-case", help="Run one assistant_stage1 case through the existing backend and save artifacts") @@ -605,6 +2179,86 @@ def build_parser() -> argparse.ArgumentParser: import_export.add_argument("--expected-result-mode") import_export.set_defaults(func=handle_import_export) + run_scenario = subparsers.add_parser( + "run-scenario", + help="Run one multi-step domain scenario in a shared assistant session and save per-step artifacts", + ) + run_scenario.add_argument("--manifest", required=True) + run_scenario.add_argument("--scenario-id") + run_scenario.add_argument("--analysis-date") + run_scenario.add_argument("--backend-url", default=DEFAULT_BACKEND_URL) + run_scenario.add_argument("--output-root", default=str(DEFAULT_ARTIFACTS_ROOT)) + run_scenario.add_argument("--prompt-version", default=DEFAULT_PROMPT_VERSION) + run_scenario.add_argument("--llm-provider", default=DEFAULT_LLM_PROVIDER, choices=["openai", "local"]) + run_scenario.add_argument("--llm-model", default=DEFAULT_LLM_MODEL) + run_scenario.add_argument("--llm-base-url", default=DEFAULT_LLM_BASE_URL) + run_scenario.add_argument("--llm-api-key", default=DEFAULT_LLM_API_KEY) + run_scenario.add_argument("--temperature", type=float, default=DEFAULT_TEMPERATURE) + run_scenario.add_argument("--max-output-tokens", type=int, default=DEFAULT_MAX_OUTPUT_TOKENS) + run_scenario.add_argument("--timeout-seconds", type=int, default=180) + run_scenario.add_argument("--use-mock", action="store_true") + run_scenario.set_defaults(func=handle_run_scenario) + + run_pack = subparsers.add_parser( + "run-pack", + help="Run a multi-scenario domain pack and save aggregate orchestration artifacts", + ) + run_pack.add_argument("--manifest", required=True) + run_pack.add_argument("--pack-id") + run_pack.add_argument("--analysis-date") + run_pack.add_argument("--max-scenarios", type=int) + run_pack.add_argument("--backend-url", default=DEFAULT_BACKEND_URL) + run_pack.add_argument("--output-root", default=str(DEFAULT_ARTIFACTS_ROOT)) + run_pack.add_argument("--prompt-version", default=DEFAULT_PROMPT_VERSION) + run_pack.add_argument("--llm-provider", default=DEFAULT_LLM_PROVIDER, choices=["openai", "local"]) + run_pack.add_argument("--llm-model", default=DEFAULT_LLM_MODEL) + run_pack.add_argument("--llm-base-url", default=DEFAULT_LLM_BASE_URL) + run_pack.add_argument("--llm-api-key", default=DEFAULT_LLM_API_KEY) + run_pack.add_argument("--temperature", type=float, default=DEFAULT_TEMPERATURE) + run_pack.add_argument("--max-output-tokens", type=int, default=DEFAULT_MAX_OUTPUT_TOKENS) + run_pack.add_argument("--timeout-seconds", type=int, default=180) + run_pack.add_argument("--use-mock", action="store_true") + run_pack.set_defaults(func=handle_run_pack) + + run_pack_loop = subparsers.add_parser( + "run-pack-loop", + help="Run autonomous analyst -> coder -> rerun iterations for a domain pack until the acceptance gate is reached", + ) + run_pack_loop.add_argument("--manifest", required=True) + run_pack_loop.add_argument("--loop-id") + run_pack_loop.add_argument("--analysis-date") + run_pack_loop.add_argument("--max-scenarios", type=int) + run_pack_loop.add_argument("--target-score", type=int, default=80) + run_pack_loop.add_argument("--max-iterations", type=int, default=8) + run_pack_loop.add_argument("--backend-url", default=DEFAULT_BACKEND_URL) + run_pack_loop.add_argument("--output-root", default=str(DEFAULT_ARTIFACTS_ROOT)) + run_pack_loop.add_argument("--prompt-version", default=DEFAULT_PROMPT_VERSION) + run_pack_loop.add_argument("--llm-provider", default=DEFAULT_LLM_PROVIDER, choices=["openai", "local"]) + run_pack_loop.add_argument("--llm-model", default=DEFAULT_LLM_MODEL) + run_pack_loop.add_argument("--llm-base-url", default=DEFAULT_LLM_BASE_URL) + run_pack_loop.add_argument("--llm-api-key", default=DEFAULT_LLM_API_KEY) + run_pack_loop.add_argument("--temperature", type=float, default=DEFAULT_TEMPERATURE) + run_pack_loop.add_argument("--max-output-tokens", type=int, default=DEFAULT_MAX_OUTPUT_TOKENS) + run_pack_loop.add_argument("--timeout-seconds", type=int, default=180) + run_pack_loop.add_argument("--use-mock", action="store_true") + run_pack_loop.add_argument("--codex-binary", default="codex") + run_pack_loop.add_argument("--codex-profile") + run_pack_loop.add_argument("--codex-model") + run_pack_loop.add_argument("--analyst-codex-model", default="gpt-5.4") + run_pack_loop.add_argument("--coder-codex-model", default="gpt-5.4-mini") + run_pack_loop.add_argument("--analyst-reasoning-effort", default="medium") + run_pack_loop.add_argument("--coder-reasoning-effort", default="low") + run_pack_loop.add_argument("--codex-timeout-seconds", type=int, default=1800) + run_pack_loop.add_argument( + "--analyst-schema", + default=str(DEFAULT_LOOP_SCHEMA_DIR / "domain_loop_analyst_verdict.schema.json"), + ) + run_pack_loop.add_argument( + "--coder-schema", + default=str(DEFAULT_LOOP_SCHEMA_DIR / "domain_loop_coder_result.schema.json"), + ) + run_pack_loop.set_defaults(func=handle_run_pack_loop) + return parser