ДОМЕНЫ - ВОПРОСЫ - ОРРКЕСТРАЦИЯ - БАЗА - Поднять внешний domain-case loop для Codex с baseline/rerun артефактами

2026-04-13 19:37:57 +03:00 · 2026-04-13 19:37:57 +03:00 · 8e16bc1f01
parent f64980fa13
commit 8e16bc1f01
59 changed files with 2663 additions and 46 deletions
--- a/.codex/agents/domain_analyst.toml
+++ b/.codex/agents/domain_analyst.toml
@ -0,0 +1,46 @@
+name = "domain_analyst"
+description = "Read-only business and technical analyst for NDC_1C domain-case verdicts based on JSON turn artifacts, assistant outputs, debug payloads, and before/after diffs."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "read-only"
+developer_instructions = """
+You are the strict domain analyst for NDC_1C.
+
+You do not write product code.
+You read:
+- case_brief.md
+- baseline_turn.json and rerun_turn.json when available
+- baseline_output.md / rerun_output.md
+- baseline_debug.json / rerun_debug.json
+- optional diffs and patch summary
+
+Your job is to produce a detailed verdict in Russian with strong business focus.
+
+Always answer in a strict structure:
+1. Смысл вопроса
+2. Что реально посчитано
+3. Где расхождение по бизнес-смыслу
+4. Где route / capability mismatch
+5. Evidence quality
+6. P0 defects
+7. P1 defects
+8. P2 defects
+9. Minimal patch directions
+10. Acceptance criteria for rerun
+11. Quality score
+12. Loop decision
+
+Rules:
+- Call out non-business garbage explicitly.
+- Distinguish exact, partial, heuristic, and technical-insufficiency modes.
+- Do not accept a heuristic result as a final answer.
+- Do not praise superficial wording improvements if the compute layer is still wrong.
+- Highlight if an answer is unusable for a manager, accountant, or operator.
+- If the system answered a weaker question than the user asked, say so explicitly.
+
+Quality score:
+- Output one integer score from 0 to 100.
+- Score >= 80 means the case can be accepted only if there is no unresolved P0.
+- If score < 80, loop_decision must be continue, partial, blocked, or needs_exact_capability.
+"""
+nickname_candidates = ["Lens", "Vector", "Delta"]
--- a/.codex/agents/domain_coder.toml
+++ b/.codex/agents/domain_coder.toml
@ -0,0 +1,46 @@
+name = "domain_coder"
+description = "Implementation-focused agent for minimal domain fixes in NDC_1C capabilities, routes, schemas, validators, evidence, and presentation logic without architecture drift."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the domain implementation agent for NDC_1C.
+
+Primary repo facts:
+- Architecture is already stabilized.
+- Exact 1C/MCP-backed routes are preferred over heuristics.
+- Address runtime, deep runtime, capability policy, route expectations, and navigation state already exist.
+- Keep patches minimal and domain-scoped.
+
+Your mission:
+- Read the case brief, baseline_turn.json, baseline output/debug, and analyst verdict.
+- Find the smallest domain-only patch that moves the case toward a correct, useful, business-readable answer.
+- Use exact 1C/MCP-backed routes when they exist.
+- If exact data does not exist in the reachable contour, surface technical insufficiency instead of fabricating a result.
+
+Allowed change zones:
+- intents
+- domain-specific routing
+- recipes
+- capability mapping
+- exact/confirmed route handling
+- domain validators
+- evidence/source-ref modeling
+- role modeling
+- follow-up resolution for one domain case
+- business-readable presentation
+
+Forbidden:
+- broad architecture changes
+- fake data
+- silent heuristic masking
+- large refactors unrelated to the case
+- changing successful baseline flows without necessity
+
+Always produce:
+1. a short coder_plan
+2. the minimal patch
+3. a patch_summary
+4. rerun instructions or executed rerun artifacts
+"""
+nickname_candidates = ["Forge", "Quartz", "Helix"]
--- a/.codex/agents/orchestrator.toml
+++ b/.codex/agents/orchestrator.toml
@ -0,0 +1,54 @@
+name = "orchestrator"
+description = "Coordinates a repo-native domain-case loop for NDC_1C: baseline capture, analyst verdict, minimal domain patch, rerun, and 80-point acceptance gate."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the orchestrator for domain-case development in NDC_1C.
+
+Primary repo facts:
+- The architecture is already established and must not be rewritten for one case.
+- The project uses a 1C/MCP-first runtime with address lane + deep lane.
+- Technical case artifacts should live in artifacts/domain_runs/<case_id>/.
+- The helper runner is python scripts/domain_case_loop.py.
+
+Your job:
+1. Accept one concrete domain case from the user.
+2. Create or reuse an artifact folder under artifacts/domain_runs/<case_id>/.
+3. Capture baseline via one of:
+   - python scripts/domain_case_loop.py run-case ...
+   - python scripts/domain_case_loop.py import-export ...
+4. Ask domain_analyst for a strict verdict in Russian using baseline_turn.json first, then baseline_output.md / baseline_debug.json.
+5. Feed the verdict to domain_coder for the smallest defensible domain-only patch.
+6. Capture rerun artifacts.
+7. Ask domain_analyst for before/after comparison and a quality score.
+8. End with one status: accepted | partial | blocked | needs_exact_capability.
+
+Hard rules:
+- Do not change architecture.
+- Do not accept heuristic output as a confirmed business answer.
+- Do not allow silent fallback masking.
+- Keep the loop artifact-driven.
+- Reuse the existing backend/session/export flow; do not invent a parallel runtime.
+- When the repo structure differs from a template, adapt the skill/scripts/paths, not the product architecture.
+
+Acceptance gate:
+- accepted requires analyst quality_score >= 80
+- accepted requires zero unresolved P0 defects
+- accepted requires no business-critical regression in rerun
+
+Required artifacts per cycle:
+- case_brief.md
+- baseline_output.md
+- baseline_debug.json
+- baseline_turn.json
+- analyst_verdict.md
+- coder_plan.md
+- patch_summary.md
+- rerun_output.md
+- rerun_debug.json
+- rerun_turn.json
+- before_after_diff.md
+- final_status.md
+"""
+nickname_candidates = ["Atlas", "Radian", "North"]
--- a/.codex/config.toml
+++ b/.codex/config.toml
@ -0,0 +1,10 @@
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+approval_policy = "on-request"
+project_root_markers = [".git", "AGENTS.md"]
+
+[agents]
+max_threads = 3
+max_depth = 1
+job_max_runtime_seconds = 3600
--- a/.codex/skills/domain-case-loop/SKILL.md
+++ b/.codex/skills/domain-case-loop/SKILL.md
@ -0,0 +1,133 @@
+---
+name: domain-case-loop
+description: Use this skill when a user wants to iteratively refine one NDC_1C domain case through a multi-agent loop: automated baseline capture, JSON analysis, minimal domain patch, rerun, and before/after verdict.
+---
+
+# Domain case loop
+
+This skill packages the standard workflow for iterating on one concrete domain case in NDC_1C.
+
+## Use this skill when
+
+- the user wants to improve one domain question end-to-end;
+- the answer exists but is noisy, heuristic, partial, or business-useless;
+- the route is wrong even if the wording looks better;
+- there is a gap between exact compute intent and actual fallback output;
+- there are follow-up / continuation bugs that corrupt business context.
+
+## Do not use this skill when
+
+- the user is asking for a broad architecture rewrite;
+- there is no concrete domain case or no reproducible input;
+- the task is only prose editing with no technical/domain component;
+- the task is a generic repo cleanup unrelated to domain capability behavior.
+
+## Repo-specific runtime map
+
+Read `references/repo_runtime_map.md` before the first real cycle.
+
+Use these repo-native capture paths:
+- automated capture: `python scripts/domain_case_loop.py run-case ...`
+- import existing technical export: `python scripts/domain_case_loop.py import-export ...`
+
+## Workflow
+
+### Step 1 - Normalize the case
+
+Create `artifacts/domain_runs/<case_id>/case_brief.md` with:
+- domain name
+- raw user question
+- expected business meaning
+- expected exact capability
+- expected result mode
+- known constraints
+- acceptance criteria draft
+
+Use `references/case_brief_template.md`.
+
+### Step 2 - Capture baseline
+
+Preferred path:
+- run `python scripts/domain_case_loop.py run-case ...`
+
+Fallback path:
+- if the user already has a copied technical export markdown, run `python scripts/domain_case_loop.py import-export ...`
+
+Required artifacts:
+- `baseline_output.md`
+- `baseline_debug.json`
+- `baseline_turn.json`
+
+### Step 3 - Analyst verdict
+
+Spawn `domain_analyst` and provide:
+- `case_brief.md`
+- `baseline_turn.json`
+- `baseline_output.md`
+- `baseline_debug.json`
+- optional relevant code excerpts or file paths
+
+Require a full verdict using `references/verdict_template.md`.
+
+### Step 4 - Domain patch
+
+Spawn `domain_coder` with:
+- the case brief
+- the analyst verdict
+- baseline artifacts
+
+Require:
+- a minimal patch
+- zero architecture drift
+- rerun after changes
+
+### Step 5 - Rerun
+
+Capture:
+- `rerun_output.md`
+- `rerun_debug.json`
+- `rerun_turn.json`
+- `patch_summary.md`
+
+### Step 6 - Before/after analysis
+
+Spawn `domain_analyst` again for:
+- before/after comparison
+- final status recommendation
+- quality score from 0 to 100
+
+### Step 7 - Final status
+
+Write `final_status.md` with one of:
+- accepted
+- partial
+- blocked
+- needs_exact_capability
+
+Accepted requires:
+- quality score >= 80
+- no unresolved P0 defects
+- no silent heuristic masking
+
+## Hard rules
+
+- Do not count heuristic candidates as confirmed business answers.
+- If exact data should exist in 1C/MCP, prefer exact route work over prompt cosmetics.
+- If exact data does not exist yet in the reachable contour, return a technical insufficiency with a crisp blocker.
+- Never fabricate 1C data.
+- Keep domain fixes minimal and localized.
+- Preserve successful baseline scenarios.
+- Treat follow-up continuity as a state-machine problem, not a wording problem.
+
+## Domain-specific framing
+
+For this repository:
+- architecture must remain unchanged;
+- 1C/MCP is the primary source of truth;
+- analyst output must be detailed and business-readable;
+- answers should be suitable for product hardening, not just debugging notes;
+- machine-readable turn artifacts are first-class inputs for analysis.
+
+## Recommended artifact set
+
+Use the artifact layout from `references/artifact_layout.md`.
--- a/.codex/skills/domain-case-loop/references/artifact_layout.md
+++ b/.codex/skills/domain-case-loop/references/artifact_layout.md
@ -0,0 +1,23 @@
+# Artifact layout
+
+For each domain case use:
+
+artifacts/domain_runs/<case_id>/
+- case_brief.md
+- baseline_output.md
+- baseline_debug.json
+- baseline_turn.json
+- baseline_session.json
+- baseline_job.json
+- baseline_report_case.json
+- analyst_verdict.md
+- coder_plan.md
+- patch_summary.md
+- rerun_output.md
+- rerun_debug.json
+- rerun_turn.json
+- rerun_session.json
+- rerun_job.json
+- rerun_report_case.json
+- before_after_diff.md
+- final_status.md
--- a/.codex/skills/domain-case-loop/references/case_brief_template.md
+++ b/.codex/skills/domain-case-loop/references/case_brief_template.md
@ -0,0 +1,29 @@
+# Case brief template
+
+## Domain
+`<domain_name>`
+
+## Raw user question
+`<raw_question>`
+
+## Expected business meaning
+- ...
+
+## Expected capability
+- ...
+
+## Expected result mode
+- confirmed_balance / confirmed_tax_liability / partial / technical_insufficiency / other
+
+## Constraints
+- no architecture changes
+- 1C/MCP first
+- no fabricated values
+- heuristic is not product success
+- accepted requires analyst quality score >= 80 and zero unresolved P0
+
+## Known current behavior
+- ...
+
+## Draft acceptance criteria
+- ...
--- a/.codex/skills/domain-case-loop/references/domain_constraints.md
+++ b/.codex/skills/domain-case-loop/references/domain_constraints.md
@ -0,0 +1,9 @@
+# Domain constraints
+
+- Архитектуру проекта не менять.
+- Максимально использовать 1С/MCP.
+- Не придумывать значения.
+- Не считать heuristic ответ продуктовым успехом.
+- Математика вне 1С допустима только как детерминированный постпроцесс над уже подтвержденными фактами.
+- Analyst read-only, Coder implementation-focused.
+- Accepted требует score >= 80, zero unresolved P0 и отсутствия silent fallback masking.
--- a/.codex/skills/domain-case-loop/references/repo_runtime_map.md
+++ b/.codex/skills/domain-case-loop/references/repo_runtime_map.md
@ -0,0 +1,32 @@
+# Repo runtime map
+
+## Existing runtime pieces
+
+- Assistant backend: `llm_normalizer/backend`
+- Technical export formatter: `llm_normalizer/frontend/src/utils/conversationExport.ts`
+- Async single-case runner: `POST /api/eval/run-async/start`
+- Async polling: `GET /api/eval/run-async/:job_id`
+- Session store path: `llm_normalizer/data/assistant_sessions/<session_id>.json`
+- Session API: `GET /api/assistant/session/:session_id`
+
+## Capture strategy for this repo
+
+1. Prefer automated capture with:
+   - `python scripts/domain_case_loop.py run-case ...`
+2. If baseline already exists as copied markdown export, import it with:
+   - `python scripts/domain_case_loop.py import-export ...`
+3. Use `baseline_turn.json` / `rerun_turn.json` as canonical analyst input.
+4. Use `baseline_output.md` / `rerun_output.md` as human-readable paired artifacts.
+
+## Default run assumptions
+
+- backend URL: `http://127.0.0.1:8787`
+- eval target: `assistant_stage1`
+- single-case async run uses generated case id `AUTO-001`
+- artifact root: `artifacts/domain_runs/<case_id>/`
+
+## Important constraints
+
+- Reuse current assistant runtime; do not build a parallel execution lane.
+- Preserve UTF-8 without BOM for every generated artifact.
+- Do not overwrite existing AGENTS rules; extend them.
--- a/.codex/skills/domain-case-loop/references/verdict_template.md
+++ b/.codex/skills/domain-case-loop/references/verdict_template.md
@ -0,0 +1,38 @@
+# Verdict
+
+## 1. Смысл вопроса
+...
+
+## 2. Что реально посчитано
+...
+
+## 3. Где расхождение по бизнес-смыслу
+...
+
+## 4. Где route / capability mismatch
+...
+
+## 5. Evidence quality
+- exact / partial / heuristic / technical insufficiency
+- why
+
+## 6. P0 defects
+- ...
+
+## 7. P1 defects
+- ...
+
+## 8. P2 defects
+- ...
+
+## 9. Minimal patch directions
+- ...
+
+## 10. Acceptance criteria for rerun
+- ...
+
+## 11. Quality score
+- integer from 0 to 100
+
+## 12. Loop decision
+- accepted / continue / partial / blocked / needs_exact_capability
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,7 @@ llm_normalizer/docs/runs/*/
 # graphify artifacts
 graphify-out/
 .graphify_*
+
+# domain-case loop artifacts
+artifacts/domain_runs/*
+!artifacts/domain_runs/.gitkeep
--- a/AGENTS.md
+++ b/AGENTS.md
@ -12,3 +12,10 @@ Rules:
 - Before answering architecture or codebase questions, read graphify-out/GRAPH_REPORT.md for god nodes and community structure
 - If graphify-out/wiki/index.md exists, navigate it instead of reading raw files
 - After modifying code files in this session, run `python -c "from graphify.watch import _rebuild_code; from pathlib import Path; _rebuild_code(Path('.'))"` to keep the graph current
+
+## codex_domain_loop
+- Project-scoped Codex orchestration lives under `.codex/`.
+- Use `.codex/skills/domain-case-loop` for repeatable domain hardening loops on one concrete case.
+- Preserve current architecture: domain loop may automate capture, review, rerun, and artifact storage, but must not rewrite runtime foundations.
+- Prefer machine-readable case artifacts in `artifacts/domain_runs/<case_id>/`, especially `baseline_turn.json` / `rerun_turn.json`, over ad hoc prose-only summaries.
+- A case can be marked `accepted` only when analyst verdict is at least `80/100`, no unresolved `P0` remains, and the rerun does not mask heuristic output as confirmed.
--- a/artifacts/domain_runs/.gitkeep
+++ b/artifacts/domain_runs/.gitkeep
@ -0,0 +1 @@
+
--- a/docs/ADDRESS/address_query/README.md
+++ b/docs/ADDRESS/address_query/README.md
@ -1,9 +1,9 @@
 # ADDRESS Query Docs

-Дата: 2026-04-08  
+Дата: 2026-04-13  
 Статус синхронизации: актуализировано по текущему коду в `llm_normalizer/backend/src/services/*`.

-## Актуальный статус (2026-04-08)
+## Актуальный статус (2026-04-13)

 - Этап стабилизации закрыт под `strict_policy=route`.
 - Step-0 pre-prod rails закрыт (reference-domain + nightly automation).
@ -47,8 +47,12 @@
 - Task Scheduler: `NDC_ADDRESS_Nightly_Regression` временно `Disabled` (ручной режим до стабилизации infra-канала).
 - Текущий production-контур: `question_mode=address_query`, live-first через MCP.
 - Следующий этап: `Step-5` Architecture + UX Quality (LLM-first валидация входа, улучшение пользовательского ответа, без расширения domain scope).
+- Contracts domain increment (2026-04-13):
+  - прямой вопрос об открытых договорах теперь идет в exact-capability `open_contracts_confirmed_as_of_date`;
+  - heuristic `list_open_contracts` сохранен как diagnostic-only слой;
+  - business-view exact-ответа усилен через `net/gross`, split `special_valid` vs `dirty_unresolved` и разрез компонентных остатков.

-## Что реально реализовано в коде (срез 2026-04-08)
+## Что реально реализовано в коде (срез 2026-04-13)

 Поддерживаемые intents в runtime:

@ -60,7 +64,8 @@
 - `customer_revenue_and_payments` (Wave-1 B3 value, gate-closed)
 - `supplier_payouts_profile` (Wave-1 B3 value, gate-closed)
 - `contract_usage_and_value` (Wave-1 B3 value, gate-closed)
- `list_open_contracts`
+- `open_contracts_confirmed_as_of_date`
+- `list_open_contracts` (diagnostic heuristic)
 - `list_payables_counterparties`
 - `list_receivables_counterparties`
 - `account_balance_snapshot`
@ -83,6 +88,7 @@

 - `address_scenario_matrix.md` - актуальная матрица сценариев `question -> intent -> recipe_id`.
 - `query_recipes_v1.md` - фактический каталог runtime recipes и их контрактов.
+- `../../TECH/address_open_contracts_confirmed_as_of_date_spec.md` - exact-spec для домена открытых договоров на дату.
 - `runtime_readiness_matrix_v1.md` - статус готовности сценариев по текущему коду.
 - `address_runtime_contracts.md` - актуальный debug/output контракт address lane.
 - `runtime_integration_plan.md` - фактическая схема интеграции в `assistantService`.
--- a/docs/ADDRESS/address_query/address_scenario_matrix.md
+++ b/docs/ADDRESS/address_query/address_scenario_matrix.md
@ -1,6 +1,6 @@
 # Address Scenario Matrix (V1)

-Дата: 2026-04-02  
+Дата: 2026-04-13  
 Режим: `question_mode=address_query` (отдельно от deep-analysis)

 ## Scope
@ -21,7 +21,8 @@

 | scenario_id | Пользовательский вопрос | intent | required_filters (runtime) | optional_filters | target_entity_family | recipe_id (runtime) | expected_response_type (runtime) | priority |
 |---|---|---|---|---|---|---|---|---|
-| AQ-P0-01 | Какие договоры не закрыты на текущую дату? | `list_open_contracts` | - | `as_of_date`, `organization`, `limit` | `ACCOUNTING_REGISTER`, `DOCUMENT`, `NSI_CATALOG` | `address_open_contracts_candidates_v1` | `FACTUAL_LIST` | P0 |
+| AQ-P0-01 | Какие есть открытые договоры на дату? | `open_contracts_confirmed_as_of_date` | `as_of_date` (`as_of_date` defaulted from period end) | `period_from`, `period_to`, `organization`, `counterparty`, `contract`, `limit`, `sort` | `ACCOUNTING_REGISTER`, `DOCUMENT`, `NSI_CATALOG` | `address_open_contracts_confirmed_as_of_date_v1` | `FACTUAL_LIST` | P0 |
+| AQ-P0-01H | Покажи кандидаты незакрытых договоров / где спорные хвосты по договорам? | `list_open_contracts` | - | `as_of_date`, `organization`, `limit` | `ACCOUNTING_REGISTER`, `DOCUMENT`, `NSI_CATALOG` | `address_open_contracts_candidates_v1` | `FACTUAL_LIST` | P0 |
 | AQ-P0-02 | Кому мы должны денег на сегодня? | `list_payables_counterparties` | - | `as_of_date`, `counterparty`, `contract`, `limit` | `ACCOUNTING_REGISTER` | `address_movements_payables_v1` | `FACTUAL_LIST` | P0 |
 | AQ-P0-03 | Кто должен нам денег на сегодня? | `list_receivables_counterparties` | - | `as_of_date`, `counterparty`, `contract`, `limit` | `ACCOUNTING_REGISTER` | `address_movements_receivables_v1` | `FACTUAL_LIST` | P0 |
 | AQ-P0-04 | Какой остаток по счету 60 на дату? | `account_balance_snapshot` | `account` (`as_of_date` defaulted) | `as_of_date`, `period_from`, `period_to`, `limit` | `ACCOUNTING_REGISTER`, `CHART_OF_ACCOUNTS` | `address_movements_account_snapshot_v1` | `FACTUAL_SUMMARY` | P0 |
@ -42,19 +43,21 @@
 - Если mode распознан как `address_query`, ответ строится через whitelist recipe + MCP live path.
 - Если `shape=EXPLAIN_OR_REASON`, запрос не идет в address lane (handoff в deep-analysis).
 - Если обязательные фильтры не извлечены, возвращается `LIMITED_WITH_REASON` с `limited_reason_category=missing_anchor`.
+- Для `open_contracts_confirmed_as_of_date` запрещена silent-подмена на `list_open_contracts`; при недоступности exact-ответа допустим только честный `LIMITED_WITH_REASON`.
 - Для `account_balance_snapshot` и `documents_forming_balance`:
  - `as_of_date` берется из `period_to`, если период задан;
  - иначе default на текущую дату runtime.
 - Для `documents/bank by counterparty|contract` период по умолчанию не форсируется (all-time), с runtime лимитами и fallback-логикой.
 - `COMPOUND_FACTUAL_QUERY` пока только детектируется; multi-intent execution не реализован.

-## Runtime status note (2026-04-02)
+## Runtime status note (2026-04-13)

 Реально реализованы в runtime:

 - `period_coverage_profile`
 - `document_type_and_account_section_profile`
- `list_open_contracts`
+- `open_contracts_confirmed_as_of_date`
+- `list_open_contracts` (diagnostic heuristic)
 - `open_items_by_counterparty_or_contract`
 - `list_documents_by_counterparty`
 - `bank_operations_by_counterparty`
--- a/docs/ADDRESS/address_query/complex_questions_status_and_reuse_map_2026-04-02.md
+++ b/docs/ADDRESS/address_query/complex_questions_status_and_reuse_map_2026-04-02.md
@ -1,5 +1,7 @@
 # Статус сложных вопросов и карта переиспользования (2026-04-02)

+Файл сохранен под историческим именем `complex_questions_status_and_reuse_map_2026-04-02.md`, но дополнен актуальным обновлением на 2026-04-13.
+
 Контур: `question_mode=address_query`

 ## 1. Что реально есть в коде сейчас
@ -43,3 +45,15 @@
 1. Закрыть Batch-1 (`Q1..Q7`, `Q28`) на базе уже подтвержденных `R01/R02` + `R03` + часть `R07`.
 2. После Batch-1 перейти к lifecycle (Batch-2), не смешивая его с risk/аномалиями.
 3. Каждую пачку закрывать run-pack артефактами и обязательным comparator к baseline.
+
+
+## 5. Update 2026-04-13: что уже перестало быть только design-layer
+
+- Класс вопросов про открытые договоры на дату больше не остается чисто heuristic/runtime-gap сценарием.
+- Для прямого business wording теперь есть отдельный exact-path:
+  - `open_contracts_confirmed_as_of_date`
+  - `address_open_contracts_confirmed_as_of_date_v1`
+- Это не отменяет диагностический `list_open_contracts`, но меняет его роль:
+  - heuristic-layer теперь служит для diagnostic/triage сценариев;
+  - exact business answer строится отдельным compute-route.
+- Следующий remaining gap по этому домену теперь лежит не в route existence, а в entity normalization и executive presentation.
--- a/docs/ADDRESS/address_query/project_status_rails_graph_2026-04-08.md
+++ b/docs/ADDRESS/address_query/project_status_rails_graph_2026-04-08.md
@ -1,6 +1,8 @@
 # ADDRESS Project Status Rails Graph (2026-04-08)

-Дата среза: 2026-04-08  
+Файл сохранен под историческим именем `project_status_rails_graph_2026-04-08.md`, но дополнен актуальным update-блоком на 2026-04-13.
+
+Дата базового среза: 2026-04-08  
 Контур: `question_mode=address_query`

 ## Граф статуса
@ -65,3 +67,17 @@ flowchart LR
 - Added debug/log audit fields: `dialog_continuation_contract_v2`, `address_retry_audit`.
 - Targeted regression after hardening: `246/246` PASS (`assistantAddressFollowupContext`, `addressQueryRuntimeM23`, `assistantAddressLlmPredecompose`).
 - Living router increment: conversational `chat` mode added for non-data messages with safe fallback to deep pipeline (`assistantLivingRouter 4/4`, `assistantLivingChatMode 1/1`, build PASS).
+
+
+## Contracts Exact Increment Update (2026-04-13)
+
+- Для прямого вопроса об открытых договорах введен exact-capability:
+  - `open_contracts_confirmed_as_of_date`
+  - `address_open_contracts_confirmed_as_of_date_v1`
+- `list_open_contracts` больше не рассматривается как target-route для прямого business question и сохранен только как diagnostic heuristic-layer.
+- Поверх exact snapshot добавлен business-view:
+  - `net_open_balance`
+  - `gross_open_balance`
+  - split `special_valid` / `dirty_unresolved`
+- Актуальный targeted gate после increment:
+  - `367/367 PASS` (`addressQueryRuntimeM23 + assistantLivingRouter + assistantWave17 regression`).
--- a/docs/ADDRESS/address_query/query_recipes_v1.md
+++ b/docs/ADDRESS/address_query/query_recipes_v1.md
@ -1,6 +1,6 @@
 # Query Recipes V1 (Address Query)

-Дата: 2026-04-02  
+Дата: 2026-04-13  
 Контур: `question_mode=address_query` (live-first, whitelist only)

 ## 1) Safe Access Contract
@ -48,7 +48,8 @@
 | `address_document_type_and_account_section_profile_v1` | `document_type_and_account_section_profile` | профиль типов документов + заполненность разделов учета | - | `period_from`, `period_to`, `organization`, `limit` | `document_section_profile` | `preferred` |
 | `address_movements_payables_v1` | `list_payables_counterparties` | movement-срез по обязательствам | - | `as_of_date`, `counterparty`, `contract`, `limit` | `movements` | `preferred` |
 | `address_movements_receivables_v1` | `list_receivables_counterparties` | movement-срез по требованиям | - | `as_of_date`, `counterparty`, `contract`, `limit` | `movements` | `preferred` |
-| `address_open_contracts_candidates_v1` | `list_open_contracts` | кандидаты незакрытых договоров | - | `as_of_date`, `organization`, `limit` | `movements` | `preferred` |
+| `address_open_contracts_confirmed_as_of_date_v1` | `open_contracts_confirmed_as_of_date` | подтвержденный срез открытых договоров на дату | `as_of_date` | `period_from`, `period_to`, `organization`, `counterparty`, `contract`, `limit`, `sort` | `open_contracts_confirmed_as_of_balance_profile` | `strict` |
+| `address_open_contracts_candidates_v1` | `list_open_contracts` | диагностические кандидаты незакрытых договоров | - | `as_of_date`, `organization`, `limit` | `movements` | `preferred` |
 | `address_open_items_by_party_or_contract_v1` | `open_items_by_counterparty_or_contract` | открытые позиции по party/contract | - (`service guard`: нужен `counterparty OR contract`) | `as_of_date`, `counterparty`, `contract`, `limit` | `movements` | `preferred` |
 | `address_documents_by_counterparty_v1` | `list_documents_by_counterparty` | документы по контрагенту | `counterparty` | `period_from`, `period_to`, `as_of_date`, `organization`, `limit`, `sort` | `bank_docs` | `preferred` |
 | `address_bank_operations_by_counterparty_v1` | `bank_operations_by_counterparty` | банковские операции по контрагенту | `counterparty` | `period_from`, `period_to`, `as_of_date`, `organization`, `limit`, `sort` | `bank_docs` | `preferred` |
@ -65,7 +66,8 @@
  - `document_type_and_account_section_profile`;
  - `documents/bank by counterparty|contract`;
  - `open_items_by_counterparty_or_contract`;
-  - `list_open_contracts`.
+  - `list_open_contracts`;
+  - `open_contracts_confirmed_as_of_date`.
 - Для all-time запросов `documents/bank by *` runtime поднимает limit до max.
 - Для account intents при явном `account` limit поднимается до `200`.

@ -91,6 +93,9 @@ Legacy совместимость:
  - auto-broaden периода до доступных данных (`period_window_auto_broadened_to_available_data`).
 - Для `documents/bank by *` при anchor mismatch:
  - factual fallback на ближайшие строки (`anchor_not_matched_fallback_rows`) вместо silent empty.
+- Для `open_contracts_confirmed_as_of_date`:
+  - exact недоступность должна заканчиваться `LIMITED_WITH_REASON`;
+  - `list_open_contracts` допустим только как отдельный diagnostic capability, не как silent fallback.

 ## 8) Result Modes

--- a/docs/ADDRESS/address_query/runtime_integration_plan.md
+++ b/docs/ADDRESS/address_query/runtime_integration_plan.md
@ -1,6 +1,6 @@
 # Runtime Integration Plan (question_mode=address_query)

-Дата среза: 2026-04-02
+Дата среза: 2026-04-13

 ## 1) Цель

@ -54,7 +54,8 @@ Address lane уже встроен в `AssistantService` и включен featu

 Реализовано:

- `list_open_contracts`
+- `open_contracts_confirmed_as_of_date`
+- `list_open_contracts` (diagnostic heuristic)
 - `list_payables_counterparties`
 - `list_receivables_counterparties`
 - `account_balance_snapshot`
@ -92,6 +93,7 @@ Address lane уже встроен в `AssistantService` и включен featu
 - Period auto-broaden for by-counterparty/by-contract docs+bank intents
 - Anchor mismatch fallback для document/bank intents
 - Contract-docs recovery path через bank-like rows
+- Для `open_contracts_confirmed_as_of_date` silent degrade в `list_open_contracts` запрещен; при недоступности exact-ответа допускается только `LIMITED_WITH_REASON`.

 ## 8) Compound Scope

--- a/docs/ADDRESS/address_query/runtime_readiness_matrix_v1.md
+++ b/docs/ADDRESS/address_query/runtime_readiness_matrix_v1.md
@ -1,6 +1,6 @@
 # Runtime Readiness Matrix V1 (Code Sync)

-Дата: 2026-04-08
+Дата: 2026-04-13

 Формат: `scenario -> structural_readiness -> runtime_readiness -> blocker`

@ -17,7 +17,8 @@

 | scenario_id | scenario | structural_readiness | runtime_readiness | current_blocker | next_action |
 |---|---|---|---|---|---|
-| AQ-P0-01 | list_open_contracts | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | contract candidates зависят от качества movement materialization | усилить contract resolver confidence и стабилизировать non-empty профили |
+| AQ-P0-01 | open_contracts_confirmed_as_of_date | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | exact core работает; остаточный риск теперь в entity normalization и качестве 76-аналитик | усиливать identity quality gates, special/dirty split и executive summary |
+| AQ-P0-01H | list_open_contracts (diagnostic) | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | heuristic shortlist пригоден только для диагностики и не должен подменять exact final output | держать как diagnostic-only capability и не смешивать с exact-path |
 | AQ-P0-02 | list_payables_counterparties | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | broad prompts могут давать sparse/empty | держать curated positive + периодные подсказки |
 | AQ-P0-03 | list_receivables_counterparties | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | broad prompts могут давать sparse/empty | держать curated positive + периодные подсказки |
 | AQ-P0-04 | account_balance_snapshot | STRUCTURALLY_VISIBLE | LIVE_QUERYABLE_WITH_LIMITS | часть кейсов упирается в account-scope/materialization | продолжить account token/shape audit |
@ -38,7 +39,19 @@
 | AQ-P1-10 | account_turnover_snapshot | STRUCTURALLY_VISIBLE | UNKNOWN | intent/recipe отсутствуют в runtime | планировать как отдельный домен Step-4 |
 | AQ-P1-11 | list_documents_by_type | STRUCTURALLY_VISIBLE | UNKNOWN | intent/recipe отсутствуют в runtime | планировать как отдельный домен Step-4 |

-## Sync Notes (2026-04-08)
+## Sync Notes (2026-04-13)
+
+- В runtime появился отдельный exact intent для открытых договоров:
+  - `open_contracts_confirmed_as_of_date`
+  - `address_open_contracts_confirmed_as_of_date_v1`
+- `list_open_contracts` сохранен как diagnostic heuristic-layer и больше не считается целевым final route для прямого бизнес-вопроса об открытых договорах.
+- Для open-contracts exact добавлен второй business-layer поверх точного snapshot:
+  - `net_open_balance`
+  - `gross_open_balance`
+  - `balance_components[]`
+  - split `special_valid` vs `dirty_unresolved`
+- Актуальный targeted code gate:
+  - `addressQueryRuntimeM23 + assistantLivingRouter + assistantWave17 regression = 367/367 PASS`.

 - В runtime реализованы by-contract intents:
  - `list_documents_by_contract`
--- a/docs/TECH/1CLLMARCH-FACT.md
+++ b/docs/TECH/1CLLMARCH-FACT.md
@ -1,8 +1,19 @@
 # 1CLLMARCH Fact Check And Stabilization Plan

-Updated at: 2026-04-11  
+Updated at: 2026-04-13  
 Source baseline: `docs/TECH/1CLLMARCH.md`

+## Update 2026-04-13
+
+- Exact capability for open contracts as-of date is now implemented:
+  - `open_contracts_confirmed_as_of_date`
+  - `address_open_contracts_confirmed_as_of_date_v1`
+- The former `list_open_contracts` path remains only as diagnostic heuristic-layer and is no longer the target final route for direct business wording.
+- The main remaining gap for this domain is now presentation/entity quality, not route existence:
+  - net/gross aggregation,
+  - special vs dirty split,
+  - contract/counterparty identity quality.
+
 ## 1. Purpose

 This document fixes the current factual state of the codebase against `1CLLMARCH` and records a production-focused stabilization plan that preserves:
--- a/docs/TECH/STATUS_2026-04-12.md
+++ b/docs/TECH/STATUS_2026-04-12.md
@ -1,52 +1,75 @@
-# Статус проекта на 2026-04-12
+# Статус проекта на 2026-04-13
+
+Файл сохранен под историческим именем `STATUS_2026-04-12.md`, но содержимое актуализировано по коду и тестам на 2026-04-13.

 ## 1) Что уже стабильно в compute-слое

- Введены и работают exact-маршруты подтвержденного среза на дату:
+- В runtime закреплены exact-маршруты подтвержденного среза на дату:
  - `payables_confirmed_as_of_date` (`address_payables_confirmed_as_of_date_v1`)
  - `receivables_confirmed_as_of_date` (`address_receivables_confirmed_as_of_date_v1`)
  - `vat_payable_confirmed_as_of_date` (`address_vat_payable_confirmed_as_of_date_v1`)
- Для этих интентов зафиксирован expected route/result mode в:
+  - `vat_liability_confirmed_for_tax_period` (`address_vat_liability_confirmed_tax_period_v1`)
+  - `open_contracts_confirmed_as_of_date` (`address_open_contracts_confirmed_as_of_date_v1`)
+- Для exact-сценариев зафиксирован контракт:
+  - `requested_result_mode = confirmed_balance`
+  - `result_mode = confirmed_balance`
+  - `capability_route_mode = exact`
+- Для открытых договоров прямой бизнес-вопрос больше не идет в heuristic shortlist:
+  - exact-вопросы маршрутизируются в `open_contracts_confirmed_as_of_date`;
+  - `list_open_contracts` сохранен как диагностический heuristic-слой, а не как substitute для exact-ответа.
+- Для exact-интентов ожидания маршрутов закреплены в:
  - `docs/TECH/address_route_expectations_v1.json`
- Режим результата для exact-сценариев закреплен как `confirmed_balance`.

-## 2) Что исправлено в цепных (follow-up) вопросах
+## 2) Что доведено в follow-up и presentation-слое

- Исправлен перенос даты среза в коротких продолжениях по долгам:
-  - после вопроса о долгах на дату follow-up по дебиторке наследует `as_of_date`, если новая дата не задана явно.
- Добавлен короткий follow-up для НДС:
-  - короткие реплики вида `а ндс?`/`по ндс` теперь корректно идут в VAT exact-route с переносом даты среза из контекста.
- Сохранена стратегия LLM-first нормализации с последующим детерминированным compute-роутингом.
+- Короткие follow-up-вопросы продолжают использовать дату среза из контекста, если новая дата явно не задана.
+- Для debt/VAT/open-contracts контуров сохранена схема `LLM-first normalize -> deterministic compute route`.
+- Для exact-кейса открытых договоров presentation-слой стал бизнесовее:
+  - появился `net/gross` слой поверх точного среза;
+  - одна детальная строка = один договор, один контрагент, один тип открытого остатка;
+  - смешанные экономические смыслы не склеиваются в одну строку;
+  - отдельными блоками вынесены `финансовые/специальные` и `спорные/некачественно нормализованные` позиции.
+- Для open-contracts exact-core отделен от heuristic diagnostics: улучшения бизнес-вывода больше не требуют менять сам route.

 ## 3) Что уже покрыто тестами

- Добавлены/актуализированы тесты на carryover и follow-up:
+- Актуальный целевой regression-gate:
  - `llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts`
-  - `llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts`
- Проверен маршрутный baseline:
-  - `llm_normalizer/backend/tests/addressRouteBaseline.test.ts`
+  - `llm_normalizer/backend/tests/assistantLivingRouter.test.ts`
+  - `llm_normalizer/backend/tests/assistantWave17RunRegression20260411.test.ts`
+- Текущий кодовый результат:
+  - `367/367` PASS.
+- В тестах отдельно закрыты:
+  - exact routing для `open_contracts_confirmed_as_of_date`;
+  - отсутствие silent degrade в heuristic для прямого exact-запроса;
+  - business-view блоков `net/gross` и вынос грязных сущностей в спорный блок.

-## 4) Известные ограничения (не считать багом расчета)
+## 4) Известные ограничения (не считать поломкой exact-core)

- В разговорных нерелевантных репликах (эмоции/брань/односложные сообщения) система может уйти в `clarification_required`; это относится к conversational-слою, не к compute-расчету.
- `query_shape` в части exact-кейсов может оставаться `UNKNOWN` при корректном `intent`; расчетный маршрут при этом работает корректно.
- Качество бизнес-категоризации контрагентов (особенно по счету 76) требует отдельной донастройки presentation-слоя.
+- `query_shape` в части exact-кейсов может оставаться `UNKNOWN` при корректном `intent`; сам вычислительный маршрут при этом работает корректно.
+- В exact-кейсе открытых договоров главный остаточный риск теперь не в маршрутизации, а в качестве бизнес-сущностей:
+  - неидеальная идентичность `contract_label` / `counterparty_label`;
+  - грязные аналитики по счету `76`;
+  - дальнейшее улучшение executive-summary поверх уже точного среза.
+- `list_open_contracts` по-прежнему heuristic и должен использоваться только как диагностический слой.
+- `COMPOUND_FACTUAL_QUERY` остается detection-only: multi-intent execution в runtime пока не включен.

 ## 5) Что в приоритете дальше

-1. НДС-контур: усилить доказательную часть расчета "к уплате на дату" и добавить понятную детализацию оснований.
-2. Цепные вопросы: закрепить перенос контекста между payables/receivables/VAT во всех коротких follow-up формулировках.
-3. Ответы для UI: довести формат вывода до стабильной блочной структуры без markdown-зависимости.
-4. Категоризация: отделить поставщиков/заказчиков от банков/госорганов/спецобязательств в итоговой выдаче.
+1. НДС-контур: усилить exact evidence layer для ответов “НДС к уплате / обязательство за период/на дату”.
+2. Открытые договоры: усилить quality gates для `contract/counterparty identity`, особенно на `76` и специальных расчетах.
+3. UI-ответы: довести exact business view до executive-summary уровня без потери доказательности.
+4. Compound factual queries: не расширять домены раньше, чем появится контролируемый multi-intent execution.

 ## 6) Быстрый smoke-check (ручной)

-1. `кому мы должны на сентябрь 2017`
-2. `а нам кто должен?`
-3. `кто нам должен на сентябрь 2017`
-4. `а ндс?`
+1. `какие есть открытые договора на май 2020`
+2. `а по ним кто нам должен и кому должны мы?`
+3. `скок надо ндс платить на март 2020`
+4. `а на эту же дату`

 Ожидаемое поведение:

- для 1/3 — `confirmed_balance` в exact-route,
- для 2/4 — корректный follow-up с переносом даты среза, без ухода в эвристический shortlist для exact-интентов.
+- для 1 — exact route `open_contracts_confirmed_as_of_date`, `confirmed_balance`, без подмены на heuristic shortlist;
+- для 2 — follow-up c сохранением даты и корректным переключением домена;
+- для 3/4 — exact VAT/payables route с переносом даты среза, если пользователь не задал новую.
--- a/docs/TECH/address_open_contracts_confirmed_as_of_date_spec.md
+++ b/docs/TECH/address_open_contracts_confirmed_as_of_date_spec.md
@ -0,0 +1,176 @@
+# Address Query Spec: Confirmed Open Contracts As Of Date
+
+## 1. Контекст проблемы
+
+Запросы вида:
+
+- `какие есть открытые договора на май 2020`
+- `какие договоры открыты на 31.05.2020`
+- `покажи открытые договоры на дату`
+
+относятся к классу **балансных договорных запросов** и требуют точного среза открытых взаиморасчетов на дату.
+
+Историческая проблема состояла в том, что сценарий `list_open_contracts` долгое время жил как heuristic shortlist по движениям `60/62/76`. Для диагностики это полезно, но для прямого бизнес-ответа недостаточно.
+
+## 2. Цель
+
+Ввести и зафиксировать exact-capability `open_contracts_confirmed_as_of_date`, который:
+
+- строит подтвержденный срез договоров с ненулевым остатком взаиморасчетов на дату;
+- не подменяется heuristic shortlist в финальном пользовательском ответе;
+- показывает не только точные компоненты, но и управленческий `net/gross` view поверх них;
+- отделяет специальные валидные позиции от грязных/некачественно нормализованных сущностей.
+
+## 3. Базовая бизнес-дефиниция
+
+`Открытый договор = договор, по которому на дату среза есть ненулевой остаток взаиморасчетов.`
+
+По умолчанию это не означает:
+
+- “активный по карточке договора”;
+- “не исполненный по предмету договора”;
+- “просроченный по сроку оплаты”.
+
+Это именно балансный срез открытых расчетов.
+
+## 4. Канонический runtime-контракт
+
+- `intent = open_contracts_confirmed_as_of_date`
+- `recipe_id = address_open_contracts_confirmed_as_of_date_v1`
+- `requested_result_mode = confirmed_balance`
+- `result_mode = confirmed_balance`
+- `capability_route_mode = exact`
+- `query_template = open_contracts_confirmed_as_of_balance_profile`
+- `account_scope = 60/62/76`
+- `account_scope_mode = strict`
+
+## 5. Источник данных
+
+Основной источник:
+
+- `РегистрБухгалтерии.Хозрасчетный.Остатки(<as_of_date>)`
+
+Контур включает остатки по счетам:
+
+- `60*`
+- `62*`
+- `76*`
+
+Сценарий использует exact snapshot по остаткам, а не только movement-shortlist.
+
+## 6. Единицы агрегации
+
+### 6.1 Exact component row
+
+Минимальная детальная единица ответа:
+
+- один договор;
+- один контрагент;
+- один тип открытого остатка.
+
+Поддерживаемые типы компонентов:
+
+- `receivable`
+- `payable`
+- `advance_issued`
+- `advance_received`
+- `other_receivable`
+- `other_payable`
+
+### 6.2 Management profile row
+
+Поверх component-уровня собирается второй слой:
+
+- `contract + counterparty`
+- `net_open_balance`
+- `gross_open_balance`
+- `balance_components[]`
+
+Именно этот слой нужен для бизнес-чтения, чтобы не воспринимать противоположные компоненты как дубли.
+
+## 7. Категории результата
+
+### 7.1 Commercial
+
+Нормально нормализованный договорный профиль, пригодный для основного бизнес-списка.
+
+### 7.2 Special valid
+
+Позиция не коммерческая, но валидная по смыслу:
+
+- финансовые/банковские договоры;
+- специальные расчетные позиции.
+
+### 7.3 Dirty unresolved
+
+Позиция точная по балансу, но грязная по сущностям:
+
+- не удалось надежно определить контрагента;
+- договор не похож на устойчивый договорный реквизит;
+- в поле договора похоже попал контрагент или чужая аналитика;
+- по одному договору нашлось несколько конфликтующих контрагентов.
+
+Такие строки не должны смешиваться с основным коммерческим блоком.
+
+## 8. Quality gates
+
+### 8.1 Contract identity gate
+
+Строка не считается качественной коммерческой строкой, если `contract_label`:
+
+- слишком короткий;
+- похож на юрлицо, а не на договор;
+- совпадает с `counterparty_label`;
+- выглядит как служебная аналитика.
+
+### 8.2 Counterparty identity gate
+
+Строка не считается чистой, если:
+
+- контрагент не определен;
+- в поле контрагента попал текст договора/служебная аналитика;
+- контрагент конфликтует между компонентами одного профиля.
+
+## 9. Политика fallback
+
+Для `open_contracts_confirmed_as_of_date` запрещено:
+
+- тихо деградировать в `heuristic_candidates` как финальный ответ;
+- выдавать diagnostic shortlist вместо точного snapshot без явной маркировки.
+
+Допустимый fallback только один:
+
+- `LIMITED_WITH_REASON`, если exact не удалось собрать.
+
+`list_open_contracts` допускается только как отдельный heuristic diagnostic capability.
+
+## 10. Контракт ответа
+
+### 10.1 Exact business answer
+
+Ответ должен содержать:
+
+1. статус exact-результата;
+2. дату среза;
+3. `net` и `gross` summary;
+4. блок `чистый открытый остаток по договорам`;
+5. блоки детальных компонентов;
+6. отдельный блок `финансовые/специальные позиции`;
+7. отдельный блок `спорные/некачественно нормализованные позиции`.
+
+### 10.2 Что пользователь не должен видеть
+
+- `0` и пустые account placeholders;
+- смешение противоположных остатков в одной строке;
+- грязные entity labels в основном коммерческом блоке.
+
+## 11. Acceptance criteria
+
+1. Прямой вопрос про открытые договоры на дату идет в `open_contracts_confirmed_as_of_date`, а не в `list_open_contracts`.
+2. Финальный exact-ответ имеет:
+   - `result_mode = confirmed_balance`
+   - `balance_confirmed = true`
+   - `capability_route_mode = exact`
+3. Для одного договора возможны несколько component-строк, но management-view показывает также `net/gross` профиль.
+4. Специальные валидные позиции и грязные сущности разведены в разные блоки.
+5. Heuristic shortlist не подменяет exact output.
--- a/docs/orchestration/codex_domain_loop_package_v2.zip
+++ b/docs/orchestration/codex_domain_loop_package_v2.zip
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/domain_analyst.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/domain_analyst.toml
@ -0,0 +1,39 @@
+name = "domain_analyst"
+description = "Read-only business and technical analyst for domain-case verdicts based on assistant outputs, JSON debug payloads, diffs, and rerun artifacts."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "read-only"
+developer_instructions = """
+You are the strict domain analyst.
+
+You do not write product code.
+You read:
+- the user question
+- the assistant answer
+- technical_debug_payload_json
+- optional diffs
+- rerun artifacts
+
+Your job is to produce a detailed verdict in Russian with strong business focus.
+
+Always answer in a strict structure:
+1. Смысл вопроса
+2. Что реально посчитано
+3. Где расхождение по бизнес-смыслу
+4. Где route / capability mismatch
+5. Evidence quality
+6. P0 defects
+7. P1 defects
+8. P2 defects
+9. Minimal patch directions
+10. Acceptance criteria for rerun
+
+Rules:
+- Call out non-business garbage explicitly.
+- Distinguish exact, partial, heuristic, and technical-insufficiency modes.
+- Do not accept a heuristic result as a final answer.
+- Do not praise superficial wording improvements if the compute layer is still wrong.
+- Highlight if an answer is unusable for a manager, accountant, or operator.
+- If the system answered a weaker question than the user asked, say so explicitly.
+"""
+nickname_candidates = ["Lens", "Vector", "Delta"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/domain_coder.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/domain_coder.toml
@ -0,0 +1,43 @@
+name = "domain_coder"
+description = "Implementation-focused agent for minimal domain fixes in 1C/MCP capabilities, routes, schemas, validators, and presentation logic without changing architecture."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the domain implementation agent.
+
+Your mission:
+- Read the target case, current answer, JSON/debug payload, and analyst verdict.
+- Find the smallest domain-only patch that moves the case toward a correct, useful, business-readable answer.
+- Do not change architecture.
+- Do not rewrite orchestration globally unless the change is strictly local and domain-scoped.
+- Prefer exact 1C/MCP-backed routes over heuristics.
+- If exact data exists in 1C/MCP, use it.
+- If exact data does not exist, surface a technical insufficiency rather than fabricating a result.
+
+Allowed change zones:
+- intents
+- domain-specific routing
+- recipes
+- capability mapping
+- evidence/source-ref modeling
+- role modeling
+- exact/confirmed routes
+- domain validators
+- follow-up resolution for a domain case
+- business-readable presentation
+
+Forbidden:
+- broad architecture changes
+- fake data
+- silent heuristic masking
+- large refactors unrelated to the case
+- changing successful baseline flows without necessity
+
+Always produce:
+1. a short coder_plan
+2. the minimal patch
+3. a patch_summary
+4. rerun instructions or executed rerun artifacts
+"""
+nickname_candidates = ["Forge", "Quartz", "Helix"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/orchestrator.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/agents/orchestrator.toml
@ -0,0 +1,38 @@
+name = "orchestrator"
+description = "Coordinates a domain-case loop: baseline run, analyst verdict, minimal domain patch, rerun, and final acceptance status."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the orchestrator for domain-case development in a 1C/MCP project.
+
+Your job:
+1. Accept a domain case from the user.
+2. Create or reuse an artifact folder under artifacts/domain_runs/<case_id>/.
+3. Ask domain_coder for the baseline run and artifact capture.
+4. Ask domain_analyst for a strict business/route/evidence verdict on the baseline.
+5. Feed that verdict back to domain_coder for the smallest defensible domain-only patch.
+6. Run a rerun and collect new artifacts.
+7. Ask domain_analyst for before/after comparison.
+8. End with one status: accepted | partial | blocked | needs_exact_capability.
+
+Hard rules:
+- Do not change architecture.
+- Do not allow heuristic output to be presented as a confirmed business answer.
+- Keep the process artifact-driven.
+- If the repository structure differs from the template package, inspect the project and adapt scripts/references/paths before the first serious loop.
+- Make Codex use the domain-case-loop skill when the workflow is repeatable.
+- When a case is too broad, decompose it into one exact capability question, not into vague prompt tuning.
+
+Required outputs per cycle:
+- baseline_output
+- baseline_debug
+- analyst_verdict
+- coder_plan
+- patch_summary
+- rerun_output
+- rerun_debug
+- before_after_diff
+- final_status
+"""
+nickname_candidates = ["Atlas", "Radian", "North"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/config.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/config.toml
@ -0,0 +1,13 @@
+# Project-scoped Codex configuration
+# Adapt paths and approval settings to your environment if needed.
+
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+approval_policy = "on-request"
+project_root_markers = [".git"]
+
+[agents]
+max_threads = 3
+max_depth = 1
+job_max_runtime_seconds = 3600
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/SKILL.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/SKILL.md
@ -0,0 +1,107 @@
+---
+name: domain-case-loop
+description: Use this skill when a user wants to iteratively refine a 1C/MCP domain case through a multi-agent loop: baseline run, JSON analysis, minimal domain patch, rerun, and before/after verdict. Trigger for domain debugging, capability hardening, business-answer quality fixes, follow-up continuity bugs, and exact-vs-heuristic route issues.
+---
+
+# Domain case loop
+
+This skill packages the standard workflow for iterating on a single domain case.
+
+## Use this skill when
+
+- the user wants to improve one domain question end-to-end;
+- the answer exists but is noisy, heuristic, partial, or business-useless;
+- the route is wrong even if the wording looks better;
+- there is a gap between exact compute intent and actual fallback output;
+- there are follow-up / continuation bugs that corrupt business context.
+
+## Do not use this skill when
+
+- the user is asking for a broad architecture rewrite;
+- there is no concrete domain case or no reproducible input;
+- the task is only prose editing with no technical/domain component;
+- the task is a generic repo cleanup unrelated to domain capability behavior.
+
+## Workflow
+
+### Step 1 — Normalize the case
+Create `artifacts/domain_runs/<case_id>/case_brief.md` with:
+- domain name
+- raw user question
+- expected business meaning
+- expected exact capability
+- expected result mode
+- known constraints
+- acceptance criteria draft
+
+Use `references/case_brief_template.md`.
+
+### Step 2 — Capture baseline
+Collect:
+- baseline assistant answer
+- baseline technical debug payload
+- supporting logs if available
+
+Write:
+- `baseline_output.md`
+- `baseline_debug.json`
+
+### Step 3 — Analyst verdict
+Spawn `domain_analyst` and provide:
+- raw question
+- baseline output
+- baseline debug payload
+- optional relevant code excerpts or file paths
+
+Require a full verdict using `references/verdict_template.md`.
+
+### Step 4 — Domain patch
+Spawn `domain_coder` with:
+- the case brief
+- the analyst verdict
+- the baseline artifacts
+
+Require:
+- a minimal patch
+- zero architecture drift
+- rerun after changes
+
+### Step 5 — Rerun
+Capture:
+- `rerun_output.md`
+- `rerun_debug.json`
+- `patch_summary.md`
+
+### Step 6 — Before/after analysis
+Spawn `domain_analyst` again for:
+- before/after comparison
+- final status recommendation
+
+### Step 7 — Final status
+Write `final_status.md` with one of:
+- accepted
+- partial
+- blocked
+- needs_exact_capability
+
+## Hard rules
+
+- Do not count heuristic candidates as confirmed business answers.
+- If exact data should exist in 1C/MCP, prefer exact route work over prompt cosmetics.
+- If exact data does not exist yet in the reachable contour, return a technical insufficiency with a crisp blocker.
+- Never fabricate 1C data.
+- Keep domain fixes minimal and localized.
+- Preserve successful baseline scenarios.
+- Treat follow-up continuity as a state-machine problem, not a wording problem.
+
+## Domain-specific framing
+
+For this repository:
+- architecture must remain unchanged;
+- 1C/MCP is the primary source of truth;
+- analyst output must be detailed and business-readable;
+- answers should be suitable for product hardening, not just debugging notes.
+
+## Recommended artifact set
+
+Use the artifact layout from `references/artifact_layout.md`.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/artifact_layout.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/artifact_layout.md
@ -0,0 +1,15 @@
+# Artifact layout
+
+For each domain case use:
+
+artifacts/domain_runs/<case_id>/
+- case_brief.md
+- baseline_output.md
+- baseline_debug.json
+- analyst_verdict.md
+- coder_plan.md
+- patch_summary.md
+- rerun_output.md
+- rerun_debug.json
+- before_after_diff.md
+- final_status.md
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/case_brief_template.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/case_brief_template.md
@ -0,0 +1,28 @@
+# Case brief template
+
+## Domain
+`<domain_name>`
+
+## Raw user question
+`<raw_question>`
+
+## Expected business meaning
+- ...
+
+## Expected capability
+- ...
+
+## Expected result mode
+- confirmed_balance / confirmed_tax_liability / partial / technical_insufficiency / other
+
+## Constraints
+- no architecture changes
+- 1C/MCP first
+- no fabricated values
+- heuristic is not product success
+
+## Known current behavior
+- ...
+
+## Draft acceptance criteria
+- ...
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/domain_constraints.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/domain_constraints.md
@ -0,0 +1,8 @@
+# Domain constraints
+
+- Архитектуру проекта не менять.
+- Максимально использовать 1С/MCP.
+- Не придумывать значения.
+- Не считать heuristic ответ продуктовым успехом.
+- Математика вне 1С допустима только как детерминированный постпроцесс над уже подтвержденными фактами.
+- Analyst read-only, Coder implementation-focused.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/verdict_template.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/references/verdict_template.md
@ -0,0 +1,32 @@
+# Verdict
+
+## 1. Смысл вопроса
+...
+
+## 2. Что реально посчитано
+...
+
+## 3. Где расхождение по бизнес-смыслу
+...
+
+## 4. Где route / capability mismatch
+...
+
+## 5. Evidence quality
+- exact / partial / heuristic / technical insufficiency
+- why
+
+## 6. P0 defects
+- ...
+
+## 7. P1 defects
+- ...
+
+## 8. P2 defects
+- ...
+
+## 9. Minimal patch directions
+- ...
+
+## 10. Acceptance criteria for rerun
+- ...
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/scripts/run_domain_case.ps1
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/scripts/run_domain_case.ps1
@ -0,0 +1,16 @@
+param(
+  [string]$CaseId = ("case_" + (Get-Date -Format "yyyyMMdd_HHmmss")),
+  [string]$Question = ""
+)
+
+$ArtifactDir = Join-Path "artifacts/domain_runs" $CaseId
+New-Item -ItemType Directory -Force -Path $ArtifactDir | Out-Null
+
+@"
+# Case brief
+- case_id: $CaseId
+- raw_question: $Question
+"@ | Set-Content -Encoding UTF8 (Join-Path $ArtifactDir "case_brief.md")
+
+Write-Host "Created artifact directory: $ArtifactDir"
+Write-Host "TODO: replace this script with the real project-specific baseline runner."
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/scripts/run_domain_case.sh
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/.codex/skills/domain-case-loop/scripts/run_domain_case.sh
@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CASE_ID="${1:-case_$(date +%Y%m%d_%H%M%S)}"
+QUESTION="${2:-}"
+ARTIFACT_DIR="artifacts/domain_runs/${CASE_ID}"
+
+mkdir -p "${ARTIFACT_DIR}"
+
+cat > "${ARTIFACT_DIR}/case_brief.md" <<EOF
+# Case brief
+- case_id: ${CASE_ID}
+- raw_question: ${QUESTION}
+EOF
+
+echo "Created artifact directory: ${ARTIFACT_DIR}"
+echo "TODO: replace this script with the real project-specific baseline runner."
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/AGENTS.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/AGENTS.md
@ -0,0 +1,104 @@
+# AGENTS.md
+
+## Назначение проекта
+
+Этот репозиторий содержит LLM-first + детерминистский контур для доменных запросов по 1С через MCP.
+Задача Codex в рамках этого проекта — не менять архитектуру, а системно доводить доменные capability и ответы до точного, полезного и бизнес-корректного вида.
+
+## Главные правила
+
+### 1. Архитектуру не трогать
+- Не менять архитектурное ядро проекта.
+- Не переписывать orchestration, если кейс можно решить в доменном слое.
+- Не делать широких рефакторингов ради одного доменного кейса.
+- Не размывать рабочие baseline-сценарии.
+
+### 2. Максимум 1С/MCP
+- Приоритет — точный ответ из 1С-контуров через MCP.
+- Сначала искать точный путь в 1С-модели, регистрах, документах, аналитиках, субконто, остатках и движениях.
+- Не подменять недостающий exact-route эвристикой, если можно добрать данные из 1С.
+- Если exact-route пока невозможен, прямо локализовать, чего именно не хватает.
+
+### 3. Никаких выдумок
+- Не придумывать факты, суммы, сущности, договоры, контрагентов, статусы.
+- Не считать heuristic-ответ финальным пользовательским ответом.
+- Не высасывать “причину открытости / закрытости / долга” без данных.
+- Любые вычисления вне 1С делать только как детерминированный постпроцесс над уже подтвержденными фактами.
+
+### 4. Доменные кейсы обрабатываются через capability
+Каждый серьезный доменный вопрос должен стремиться к отдельному capability, а не к расплывчатому общему route.
+Примеры:
+- confirmed_receivables_as_of_date
+- confirmed_payables_as_of_date
+- confirmed_vat_liability_for_tax_period
+- contracts_with_open_settlements_as_of_date
+
+### 5. Строгая маркировка режимов ответа
+Допустимые режимы:
+- `confirmed_balance`
+- `confirmed_tax_liability`
+- `partial`
+- `heuristic_candidates`
+- `technical_insufficiency`
+
+Если ответ не подтвержден, это должно быть явно отражено.
+Heuristic route не считается продуктовым success-состоянием.
+
+### 6. Analyst vs Coder
+- `domain_analyst` не пишет продуктовый код.
+- `domain_coder` не должен сам себе ставить “accepted” без прохождения acceptance criteria.
+- Orchestrator обязан сохранять артефакты и before/after.
+
+## Как работать над доменным кейсом
+
+1. Зафиксировать смысл вопроса.
+2. Понять, какой capability должен существовать.
+3. Получить baseline-ответ и baseline JSON.
+4. Отдать baseline `domain_analyst`.
+5. Получить P0/P1/P2 + acceptance criteria.
+6. Внести минимальные доменные правки.
+7. Выполнить rerun.
+8. Сравнить before/after.
+9. Сохранить итог.
+
+## Что считается плохой практикой
+
+- Лечить доменные баги только prose-правками.
+- Маскировать heuristic shortlist под точный ответ.
+- Путать объект ответа и источник сигнала.
+- Смешивать бизнес-сущности:
+  - контрагент
+  - договор
+  - объект расчетов
+  - обеспечительный инструмент
+  - госорган
+  - банк
+  - фин. продукт
+- Держать грязный follow-up context после неудачного turn.
+
+## Что аналитик должен проверять всегда
+
+- совпадает ли реальный ответ со смыслом вопроса;
+- exact vs heuristic;
+- есть ли подмена вопроса другим сценарием;
+- route maturity;
+- evidence completeness;
+- business readability;
+- presence of non-business garbage;
+- stable vs polluted context в follow-up сценариях.
+
+## Что кодер должен делать всегда
+
+- минимальный patch;
+- zero architecture drift;
+- zero fabricated data;
+- rerun после изменений;
+- сохранение артефактов;
+- no silent fallback masking.
+
+## Если пакет/skill требует адаптации
+
+Если фактическая структура репозитория отличается от шаблона:
+- найти реальные точки входа;
+- адаптировать scripts, references и prompts;
+- расширить шаблоны в `.codex/`, но не менять архитектурное ядро продукта.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/domain_analyst.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/domain_analyst.toml
@ -0,0 +1,39 @@
+name = "domain_analyst"
+description = "Read-only business and technical analyst for domain-case verdicts based on assistant outputs, JSON debug payloads, diffs, and rerun artifacts."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "read-only"
+developer_instructions = """
+You are the strict domain analyst.
+
+You do not write product code.
+You read:
+- the user question
+- the assistant answer
+- technical_debug_payload_json
+- optional diffs
+- rerun artifacts
+
+Your job is to produce a detailed verdict in Russian with strong business focus.
+
+Always answer in a strict structure:
+1. Смысл вопроса
+2. Что реально посчитано
+3. Где расхождение по бизнес-смыслу
+4. Где route / capability mismatch
+5. Evidence quality
+6. P0 defects
+7. P1 defects
+8. P2 defects
+9. Minimal patch directions
+10. Acceptance criteria for rerun
+
+Rules:
+- Call out non-business garbage explicitly.
+- Distinguish exact, partial, heuristic, and technical-insufficiency modes.
+- Do not accept a heuristic result as a final answer.
+- Do not praise superficial wording improvements if the compute layer is still wrong.
+- Highlight if an answer is unusable for a manager, accountant, or operator.
+- If the system answered a weaker question than the user asked, say so explicitly.
+"""
+nickname_candidates = ["Lens", "Vector", "Delta"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/domain_coder.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/domain_coder.toml
@ -0,0 +1,43 @@
+name = "domain_coder"
+description = "Implementation-focused agent for minimal domain fixes in 1C/MCP capabilities, routes, schemas, validators, and presentation logic without changing architecture."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the domain implementation agent.
+
+Your mission:
+- Read the target case, current answer, JSON/debug payload, and analyst verdict.
+- Find the smallest domain-only patch that moves the case toward a correct, useful, business-readable answer.
+- Do not change architecture.
+- Do not rewrite orchestration globally unless the change is strictly local and domain-scoped.
+- Prefer exact 1C/MCP-backed routes over heuristics.
+- If exact data exists in 1C/MCP, use it.
+- If exact data does not exist, surface a technical insufficiency rather than fabricating a result.
+
+Allowed change zones:
+- intents
+- domain-specific routing
+- recipes
+- capability mapping
+- evidence/source-ref modeling
+- role modeling
+- exact/confirmed routes
+- domain validators
+- follow-up resolution for a domain case
+- business-readable presentation
+
+Forbidden:
+- broad architecture changes
+- fake data
+- silent heuristic masking
+- large refactors unrelated to the case
+- changing successful baseline flows without necessity
+
+Always produce:
+1. a short coder_plan
+2. the minimal patch
+3. a patch_summary
+4. rerun instructions or executed rerun artifacts
+"""
+nickname_candidates = ["Forge", "Quartz", "Helix"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/orchestrator.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/agents/orchestrator.toml
@ -0,0 +1,38 @@
+name = "orchestrator"
+description = "Coordinates a domain-case loop: baseline run, analyst verdict, minimal domain patch, rerun, and final acceptance status."
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+developer_instructions = """
+You are the orchestrator for domain-case development in a 1C/MCP project.
+
+Your job:
+1. Accept a domain case from the user.
+2. Create or reuse an artifact folder under artifacts/domain_runs/<case_id>/.
+3. Ask domain_coder for the baseline run and artifact capture.
+4. Ask domain_analyst for a strict business/route/evidence verdict on the baseline.
+5. Feed that verdict back to domain_coder for the smallest defensible domain-only patch.
+6. Run a rerun and collect new artifacts.
+7. Ask domain_analyst for before/after comparison.
+8. End with one status: accepted | partial | blocked | needs_exact_capability.
+
+Hard rules:
+- Do not change architecture.
+- Do not allow heuristic output to be presented as a confirmed business answer.
+- Keep the process artifact-driven.
+- If the repository structure differs from the template package, inspect the project and adapt scripts/references/paths before the first serious loop.
+- Make Codex use the domain-case-loop skill when the workflow is repeatable.
+- When a case is too broad, decompose it into one exact capability question, not into vague prompt tuning.
+
+Required outputs per cycle:
+- baseline_output
+- baseline_debug
+- analyst_verdict
+- coder_plan
+- patch_summary
+- rerun_output
+- rerun_debug
+- before_after_diff
+- final_status
+"""
+nickname_candidates = ["Atlas", "Radian", "North"]
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/config.toml
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/config.toml
@ -0,0 +1,13 @@
+# Project-scoped Codex configuration
+# Adapt paths and approval settings to your environment if needed.
+
+model = "gpt-5.4"
+model_reasoning_effort = "high"
+sandbox_mode = "workspace-write"
+approval_policy = "on-request"
+project_root_markers = [".git"]
+
+[agents]
+max_threads = 3
+max_depth = 1
+job_max_runtime_seconds = 3600
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/SKILL.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/SKILL.md
@ -0,0 +1,107 @@
+---
+name: domain-case-loop
+description: Use this skill when a user wants to iteratively refine a 1C/MCP domain case through a multi-agent loop: baseline run, JSON analysis, minimal domain patch, rerun, and before/after verdict. Trigger for domain debugging, capability hardening, business-answer quality fixes, follow-up continuity bugs, and exact-vs-heuristic route issues.
+---
+
+# Domain case loop
+
+This skill packages the standard workflow for iterating on a single domain case.
+
+## Use this skill when
+
+- the user wants to improve one domain question end-to-end;
+- the answer exists but is noisy, heuristic, partial, or business-useless;
+- the route is wrong even if the wording looks better;
+- there is a gap between exact compute intent and actual fallback output;
+- there are follow-up / continuation bugs that corrupt business context.
+
+## Do not use this skill when
+
+- the user is asking for a broad architecture rewrite;
+- there is no concrete domain case or no reproducible input;
+- the task is only prose editing with no technical/domain component;
+- the task is a generic repo cleanup unrelated to domain capability behavior.
+
+## Workflow
+
+### Step 1 — Normalize the case
+Create `artifacts/domain_runs/<case_id>/case_brief.md` with:
+- domain name
+- raw user question
+- expected business meaning
+- expected exact capability
+- expected result mode
+- known constraints
+- acceptance criteria draft
+
+Use `references/case_brief_template.md`.
+
+### Step 2 — Capture baseline
+Collect:
+- baseline assistant answer
+- baseline technical debug payload
+- supporting logs if available
+
+Write:
+- `baseline_output.md`
+- `baseline_debug.json`
+
+### Step 3 — Analyst verdict
+Spawn `domain_analyst` and provide:
+- raw question
+- baseline output
+- baseline debug payload
+- optional relevant code excerpts or file paths
+
+Require a full verdict using `references/verdict_template.md`.
+
+### Step 4 — Domain patch
+Spawn `domain_coder` with:
+- the case brief
+- the analyst verdict
+- the baseline artifacts
+
+Require:
+- a minimal patch
+- zero architecture drift
+- rerun after changes
+
+### Step 5 — Rerun
+Capture:
+- `rerun_output.md`
+- `rerun_debug.json`
+- `patch_summary.md`
+
+### Step 6 — Before/after analysis
+Spawn `domain_analyst` again for:
+- before/after comparison
+- final status recommendation
+
+### Step 7 — Final status
+Write `final_status.md` with one of:
+- accepted
+- partial
+- blocked
+- needs_exact_capability
+
+## Hard rules
+
+- Do not count heuristic candidates as confirmed business answers.
+- If exact data should exist in 1C/MCP, prefer exact route work over prompt cosmetics.
+- If exact data does not exist yet in the reachable contour, return a technical insufficiency with a crisp blocker.
+- Never fabricate 1C data.
+- Keep domain fixes minimal and localized.
+- Preserve successful baseline scenarios.
+- Treat follow-up continuity as a state-machine problem, not a wording problem.
+
+## Domain-specific framing
+
+For this repository:
+- architecture must remain unchanged;
+- 1C/MCP is the primary source of truth;
+- analyst output must be detailed and business-readable;
+- answers should be suitable for product hardening, not just debugging notes.
+
+## Recommended artifact set
+
+Use the artifact layout from `references/artifact_layout.md`.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/artifact_layout.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/artifact_layout.md
@ -0,0 +1,15 @@
+# Artifact layout
+
+For each domain case use:
+
+artifacts/domain_runs/<case_id>/
+- case_brief.md
+- baseline_output.md
+- baseline_debug.json
+- analyst_verdict.md
+- coder_plan.md
+- patch_summary.md
+- rerun_output.md
+- rerun_debug.json
+- before_after_diff.md
+- final_status.md
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/case_brief_template.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/case_brief_template.md
@ -0,0 +1,28 @@
+# Case brief template
+
+## Domain
+`<domain_name>`
+
+## Raw user question
+`<raw_question>`
+
+## Expected business meaning
+- ...
+
+## Expected capability
+- ...
+
+## Expected result mode
+- confirmed_balance / confirmed_tax_liability / partial / technical_insufficiency / other
+
+## Constraints
+- no architecture changes
+- 1C/MCP first
+- no fabricated values
+- heuristic is not product success
+
+## Known current behavior
+- ...
+
+## Draft acceptance criteria
+- ...
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/domain_constraints.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/domain_constraints.md
@ -0,0 +1,8 @@
+# Domain constraints
+
+- Архитектуру проекта не менять.
+- Максимально использовать 1С/MCP.
+- Не придумывать значения.
+- Не считать heuristic ответ продуктовым успехом.
+- Математика вне 1С допустима только как детерминированный постпроцесс над уже подтвержденными фактами.
+- Analyst read-only, Coder implementation-focused.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/verdict_template.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/references/verdict_template.md
@ -0,0 +1,32 @@
+# Verdict
+
+## 1. Смысл вопроса
+...
+
+## 2. Что реально посчитано
+...
+
+## 3. Где расхождение по бизнес-смыслу
+...
+
+## 4. Где route / capability mismatch
+...
+
+## 5. Evidence quality
+- exact / partial / heuristic / technical insufficiency
+- why
+
+## 6. P0 defects
+- ...
+
+## 7. P1 defects
+- ...
+
+## 8. P2 defects
+- ...
+
+## 9. Minimal patch directions
+- ...
+
+## 10. Acceptance criteria for rerun
+- ...
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/scripts/run_domain_case.ps1
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/scripts/run_domain_case.ps1
@ -0,0 +1,16 @@
+param(
+  [string]$CaseId = ("case_" + (Get-Date -Format "yyyyMMdd_HHmmss")),
+  [string]$Question = ""
+)
+
+$ArtifactDir = Join-Path "artifacts/domain_runs" $CaseId
+New-Item -ItemType Directory -Force -Path $ArtifactDir | Out-Null
+
+@"
+# Case brief
+- case_id: $CaseId
+- raw_question: $Question
+"@ | Set-Content -Encoding UTF8 (Join-Path $ArtifactDir "case_brief.md")
+
+Write-Host "Created artifact directory: $ArtifactDir"
+Write-Host "TODO: replace this script with the real project-specific baseline runner."
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/scripts/run_domain_case.sh
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/CODEX_VISIBLE/skills/domain-case-loop/scripts/run_domain_case.sh
@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CASE_ID="${1:-case_$(date +%Y%m%d_%H%M%S)}"
+QUESTION="${2:-}"
+ARTIFACT_DIR="artifacts/domain_runs/${CASE_ID}"
+
+mkdir -p "${ARTIFACT_DIR}"
+
+cat > "${ARTIFACT_DIR}/case_brief.md" <<EOF
+# Case brief
+- case_id: ${CASE_ID}
+- raw_question: ${QUESTION}
+EOF
+
+echo "Created artifact directory: ${ARTIFACT_DIR}"
+echo "TODO: replace this script with the real project-specific baseline runner."
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/README.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/README.md
@ -0,0 +1,51 @@
+# Пакет для мультиагентной отработки доменов в Codex
+
+Этот пакет подготовлен под задачу итеративной отработки доменных кейсов в проекте 1С/MCP через Codex в VS Code или CLI.
+
+## Что внутри
+
+- `AGENTS.md` — корневые проектные инструкции для Codex.
+- `.codex/config.toml` — проектные настройки Codex.
+- `.codex/agents/` — три кастомных агента:
+  - `orchestrator.toml`
+  - `domain_coder.toml`
+  - `domain_analyst.toml`
+- `.codex/skills/domain-case-loop/` — skill для повторяемого цикла:
+  - кейс → прогон → артефакты → аналитика → фиксы → rerun
+- `docs/` — шаблоны и справочные документы.
+- `artifacts/domain_runs/` — место для артефактов прогонов.
+- `СОПРОВОДИТЕЛЬНОЕ_ПИСЬМО.md` — короткое письмо для передачи пакета в Codex.
+- `ТЗ_НА_РАЗВОРОТ_МУЛЬТИАГЕНТНОГО_КОНТУРА.md` — ТЗ на разворот и адаптацию под реальный репозиторий.
+
+## Как положить в проект
+
+1. Распаковать архив в корень репозитория.
+2. Проверить, что корень проекта определяется корректно.
+3. Открыть проект в VS Code с Codex extension или в Codex CLI.
+4. Попросить Codex:
+   - прочитать `AGENTS.md`
+   - проверить `.codex/config.toml`
+   - проверить кастомных агентов в `.codex/agents/`
+   - проверить skill `domain-case-loop`
+   - адаптировать пути, скрипты и ссылки под фактическую структуру репозитория
+5. После адаптации запустить первый кейс через оркестратор.
+
+## Базовый способ запуска
+
+Можно начинать сообщением в Codex примерно такого вида:
+
+> Отрабатываем домен `open_contracts`.  
+> Возьми skill `domain-case-loop`.  
+> Спауни `domain_analyst` и `domain_coder`.  
+> Кейс: "какие есть открытые договора на май 2020".  
+> Сначала собери артефакты текущего поведения, затем аналитик пусть даст verdict, потом кодер внесет минимальные доменные правки без изменения архитектуры, после этого сделай rerun и сохрани before/after.
+
+## Важные принципы пакета
+
+- Архитектурный код проекта не трогать.
+- Доменные доработки делать в субдоменных сущностях, recipes, routes, schemas, validators, domain-specific mapping и presentation logic.
+- Максимально использовать 1С-контур через MCP.
+- Эвристики не считать продуктовым ответом.
+- Ничего не высасывать из пальца.
+- Если точный ответ недоступен в 1С-контуре, прямо писать, чего не хватило.
+- Аналитик работает в read-only логике и не пишет продуктовый код.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/artifacts/domain_runs/.gitkeep
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/artifacts/domain_runs/.gitkeep
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/CASE_LIBRARY_TEMPLATE.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/CASE_LIBRARY_TEMPLATE.md
@ -0,0 +1,14 @@
+# Библиотека кейсов
+
+Для каждого домена поддерживайте набор cases:
+
+- `id`
+- `domain`
+- `raw_question`
+- `expected_business_meaning`
+- `expected_capability`
+- `expected_result_mode`
+- `must_not_happen`
+- `acceptance_criteria`
+
+Это должно стать regression suite после стабилизации.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/HOW_TO_RUN.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/HOW_TO_RUN.md
@ -0,0 +1,29 @@
+# Как запускать контур
+
+## Вариант 1 — из VS Code / Codex IDE
+
+Сформулируйте задачу примерно так:
+
+> Отрабатываем домен `<domain_name>`.  
+> Используй skill `domain-case-loop`.  
+> Спауни `domain_analyst` и `domain_coder`.  
+> Кейс: `<вопрос пользователя>`.  
+> Сначала собери baseline и JSON, потом дай analyst verdict, потом внеси минимальный domain patch без изменения архитектуры, потом сделай rerun и сохрани before/after.
+
+## Вариант 2 — из CLI
+
+Можно дать аналогичный промпт в Codex CLI в корне проекта.
+
+## Хорошая постановка кейса
+
+- домен
+- один конкретный вопрос
+- желаемый exact result
+- что в текущем ответе не устраивает
+- какие ограничения нельзя нарушать
+
+## Пример
+
+> Отрабатываем домен `contracts`.  
+> Кейс: "какие есть открытые договора на май 2020".  
+> Цель: exact capability `contracts_with_open_settlements_as_of_date`, дата 31.05.2020, без эвристического shortlist в финальном ответе.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/VERDICT_SCHEMA.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/docs/VERDICT_SCHEMA.md
@ -0,0 +1,34 @@
+# Схема verdict аналитика
+
+Analyst verdict должен быть длинным, строгим и практически полезным.
+
+## Обязательные разделы
+1. Смысл вопроса
+2. Что реально посчитано
+3. Где подмена бизнес-смысла
+4. Где capability/route mismatch
+5. Evidence quality
+6. P0
+7. P1
+8. P2
+9. Minimal patch directions
+10. Acceptance criteria
+
+## Что считать P0
+- exact question answered with heuristic output
+- fabricated or weakly grounded business conclusion
+- wrong domain object
+- broken follow-up state that contaminates later turns
+- output unusable for business user
+
+## Что считать P1
+- bad classification
+- non-business garbage
+- weak presentation of evidence
+- missing source refs
+- route expectation too permissive
+
+## Что считать P2
+- cosmetic wording
+- secondary readability issues
+- duplicate lines
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨Ъ╨г╨Ф╨Р_╨б╨Ь╨Ю╨в╨а╨Х╨в╨м_╨б╨Э╨Р╨з╨Р╨Ы╨Р.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨Ъ╨г╨Ф╨Р_╨б╨Ь╨Ю╨в╨а╨Х╨в╨м_╨б╨Э╨Р╨з╨Р╨Ы╨Р.md
@ -0,0 +1,9 @@
+# Куда смотреть сначала
+
+1. `AGENTS.md` — общие правила проекта и жёсткие запреты.
+2. `.codex/agents/orchestrator.toml` — координатор цикла.
+3. `.codex/agents/domain_coder.toml` — кодер домена.
+4. `.codex/agents/domain_analyst.toml` — аналитик домена.
+5. `.codex/skills/domain-case-loop/SKILL.md` — пошаговый workflow.
+6. `ТЗ_НА_РАЗВОРОТ_МУЛЬТИАГЕНТНОГО_КОНТУРА.md` — что Codex должен развернуть в проекте.
+7. `СОПРОВОДИТЕЛЬНОЕ_ПИСЬМО.md` — текст для передачи Codex.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨б╨Ю╨Ф╨Х╨а╨Ц╨Ш╨Ь╨Ю╨Х_╨Я╨Р╨Ъ╨Х╨в╨Р.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨б╨Ю╨Ф╨Х╨а╨Ц╨Ш╨Ь╨Ю╨Х_╨Я╨Р╨Ъ╨Х╨в╨Р.md
@ -0,0 +1,20 @@
+# Содержимое пакета
+
+Ниже два одинаковых набора файлов:
+
+1. Основной набор для Codex:
+- `.codex/config.toml`
+- `.codex/agents/orchestrator.toml`
+- `.codex/agents/domain_coder.toml`
+- `.codex/agents/domain_analyst.toml`
+- `.codex/skills/domain-case-loop/...`
+
+2. Дублирующий видимый набор:
+- `CODEX_VISIBLE/config.toml`
+- `CODEX_VISIBLE/agents/orchestrator.toml`
+- `CODEX_VISIBLE/agents/domain_coder.toml`
+- `CODEX_VISIBLE/agents/domain_analyst.toml`
+- `CODEX_VISIBLE/skills/domain-case-loop/...`
+
+Если файлов `.codex/...` не видно в проводнике, используй папку `CODEX_VISIBLE` как читаемое зеркало.
+После копирования в репозиторий рабочим набором должен остаться именно `.codex/...`.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨б╨Ю╨Я╨а╨Ю╨Т╨Ю╨Ф╨Ш╨в╨Х╨Ы╨м╨Э╨Ю╨Х_╨Я╨Ш╨б╨м╨Ь╨Ю.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨б╨Ю╨Я╨а╨Ю╨Т╨Ю╨Ф╨Ш╨в╨Х╨Ы╨м╨Э╨Ю╨Х_╨Я╨Ш╨б╨м╨Ь╨Ю.md
@ -0,0 +1,44 @@
+# Сопроводительное письмо для Codex
+
+Нужно развернуть в текущем репозитории мультиагентный контур для итеративной отработки доменных кейсов 1С/MCP.
+
+## Цель
+
+Сделать устойчивый цикл:
+
+1. Берется доменный кейс.
+2. Собирается текущий ответ, JSON и технические артефакты.
+3. Аналитик дает подробный бизнес-разбор и технический verdict.
+4. Кодер вносит минимальные доменные правки.
+5. Выполняется rerun.
+6. Результат сравнивается с предыдущим прогоном.
+7. Успешный кейс сохраняется как golden case / regression case.
+
+## Критические ограничения
+
+- Не менять архитектуру проекта.
+- Не ломать LLM-first + детерминистский контур.
+- Не придумывать значения, которых нет в 1С.
+- Не считать heuristic-ответы продуктовым success-состоянием.
+- Максимально использовать 1С/MCP и внутреннюю фактическую модель проекта.
+- Если для точного ответа данных из 1С недостаточно, вернуть честный diagnostic, а не выдуманный результат.
+- Аналитика должна быть подробной, бизнесовой и строгой.
+- Для кастомных агентов использовать `gpt-5.4`, без mini-моделей.
+
+## Что нужно сделать сначала
+
+1. Прочитать `AGENTS.md`.
+2. Проверить кастомных агентов в `.codex/agents/`.
+3. Проверить skill `domain-case-loop`.
+4. Сопоставить шаблонный пакет с фактической структурой репозитория.
+5. Расширить или поправить skill/агентов/скрипты, если реальная структура проекта этого требует.
+6. Подготовить первый тестовый запуск на одном доменном кейсе.
+
+## Что считается хорошим результатом
+
+- Один запуск кейса можно инициировать одной командой/одним промптом.
+- На выходе всегда есть артефакты before/after.
+- Есть отдельный verdict аналитика.
+- Есть минимальный patch plan кодера.
+- Есть rerun.
+- Есть явные acceptance criteria по кейсу.
--- a/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨в╨Ч_╨Э╨Р_╨а╨Р╨Ч╨Т╨Ю╨а╨Ю╨в_╨Ь╨г╨Ы╨м╨в╨Ш╨Р╨У╨Х╨Э╨в╨Э╨Ю╨У╨Ю_╨Ъ╨Ю╨Э╨в╨г╨а╨Р.md
+++ b/docs/orchestration/codex_domain_loop_package_v2/codex_domain_loop_package/╨в╨Ч_╨Э╨Р_╨а╨Р╨Ч╨Т╨Ю╨а╨Ю╨в_╨Ь╨г╨Ы╨м╨в╨Ш╨Р╨У╨Х╨Э╨в╨Э╨Ю╨У╨Ю_╨Ъ╨Ю╨Э╨в╨г╨а╨Р.md
@ -0,0 +1,173 @@
+# ТЗ на разворот мультиагентного контура отработки доменов
+
+## 1. Цель
+
+Развернуть в репозитории мультиагентный контур для системной отработки доменных кейсов 1С/MCP без изменения архитектурного ядра проекта.
+
+Контур должен поддерживать цикл:
+
+- постановка доменного кейса;
+- прогон текущей реализации;
+- сбор ответа, JSON и технических артефактов;
+- бизнес- и route-анализ;
+- минимальные доменные правки;
+- rerun;
+- сравнение before/after;
+- сохранение кейса в набор regression/golden cases.
+
+## 2. Жесткие ограничения
+
+### 2.1 Архитектура
+- Не менять архитектурный код проекта.
+- Не перепридумывать текущую orchestration-концепцию.
+- Не ломать LLM-first + детерминистскую часть.
+- Не переписывать общий runtime ради одного домена.
+
+### 2.2 Доменная работа
+- Править только доменные и субдоменные сущности, если это требуется для кейса:
+  - intents
+  - capability routing
+  - recipes
+  - exact/confirmed routes
+  - validators
+  - role modeling
+  - evidence / source refs
+  - presentation logic
+  - continuation / context hygiene для доменного поведения
+- Максимально использовать данные 1С через MCP.
+- Если 1С-контур не отдает нужную ось, сначала искать точный путь в 1С/MCP, а не заменять это эвристикой.
+
+### 2.3 Качество ответа
+- Эвристика не считается финальным продуктовым ответом.
+- Нельзя высасывать из пальца значения, контрагентов, договоры, суммы, причины.
+- Если ответ не может быть подтвержден — прямо писать, что именно не подтверждено и чего не хватает.
+- Математические вычисления вне 1С допустимы только когда они действительно нужны как детерминированный постпроцесс на уже полученных фактах.
+
+## 3. Состав агентов
+
+### 3.1 Orchestrator
+Задачи:
+- принять кейс;
+- понять, какой capability / route должен существовать;
+- запустить кодера и аналитика;
+- дождаться их результатов;
+- решить следующий шаг;
+- выполнить rerun и сверку.
+
+### 3.2 Domain Coder
+Задачи:
+- найти текущую реализацию кейса;
+- собрать текущие артефакты;
+- внести минимальные доменные правки;
+- не трогать архитектурное ядро;
+- прогнать rerun;
+- сохранить patch summary.
+
+### 3.3 Domain Analyst
+Задачи:
+- читать ответ, JSON, логи и diff;
+- не писать продуктовый код;
+- давать строгий business verdict;
+- выделять P0/P1/P2;
+- формулировать acceptance criteria на rerun.
+
+## 4. Базовый workflow
+
+1. Пользователь задает доменный кейс.
+2. Orchestrator создает case folder в `artifacts/domain_runs/<case_id>/`.
+3. Domain Coder:
+   - собирает baseline;
+   - сохраняет ответ и JSON;
+   - делает краткий patch plan.
+4. Domain Analyst:
+   - читает baseline;
+   - пишет verdict.
+5. Orchestrator возвращает verdict кодеру.
+6. Domain Coder делает минимальные правки.
+7. Выполняется rerun.
+8. Domain Analyst сравнивает before/after.
+9. Orchestrator завершает кейс итоговым статусом:
+   - accepted
+   - partial
+   - blocked
+   - needs exact capability
+
+## 5. Что именно нужно реализовать в репозитории
+
+### 5.1 Codex customization
+- Подключить и адаптировать `AGENTS.md`.
+- Подключить и адаптировать `.codex/config.toml`.
+- Подключить кастомных агентов из `.codex/agents/`.
+- Подключить skill `domain-case-loop`.
+
+### 5.2 Артефакты кейса
+На каждый кейс сохранять:
+- `case_brief.md`
+- `baseline_output.md`
+- `baseline_debug.json`
+- `analyst_verdict.md`
+- `coder_plan.md`
+- `patch_summary.md`
+- `rerun_output.md`
+- `rerun_debug.json`
+- `before_after_diff.md`
+- `final_status.md`
+
+### 5.3 Формат verdict аналитика
+Обязательные разделы:
+- смысл вопроса;
+- что реально посчитано;
+- business mismatch;
+- route mismatch;
+- evidence quality;
+- P0 / P1 / P2;
+- минимальные правки;
+- acceptance criteria.
+
+## 6. Что нужно проверить и при необходимости расширить
+
+Codex должен после загрузки пакета сам проверить:
+- фактическую структуру репозитория;
+- где реально лежат routes / recipes / normalizers / schemas / evaluators / runners;
+- как запускать локальный доменный прогон;
+- где сохранять артефакты;
+- нужно ли расширить skill дополнительными reference-файлами;
+- нужны ли project-specific scripts для baseline/rerun.
+
+Если структура проекта требует расширения шаблонов — расширить их в рамках этого пакета, но без изменения архитектурного ядра продукта.
+
+## 7. Режимы успеха
+
+### Accepted
+- получен точный или честно подтвержденный ответ;
+- acceptance criteria выполнены;
+- regression case сохранен.
+
+### Partial
+- найдено улучшение, но exact-result не достигнут;
+- причина ограничений зафиксирована.
+
+### Blocked
+- не хватает доступа, осей данных или исполняемого контура;
+- проблема локализована.
+
+### Needs exact capability
+- текущий heuristic route не годится;
+- нужен отдельный exact capability.
+
+## 8. Первая задача после разворота
+
+Взять один доменный кейс и прогнать контур end-to-end.  
+Рекомендуемый стартовый кейс:
+- открытые договоры на дату
+или
+- дебиторка / кредиторка на дату
+или
+- НДС к уплате за период
+
+Кейс должен дойти до:
+- baseline
+- verdict
+- patch
+- rerun
+- final status
--- a/docs/orchestration/domain_case_loop_repo_adapter.md
+++ b/docs/orchestration/domain_case_loop_repo_adapter.md
@ -0,0 +1,105 @@
+# Domain Case Loop Repo Adapter
+
+Документ фиксирует, как шаблонный пакет мультиагентной оркестрации привязывается к реальному репозиторию `NDC_1C`.
+
+## Что уже есть в проекте
+
+- Ассистентный runtime с `address_query` и `deep` ветками.
+- Technical export в UI:
+  - `llm_normalizer/frontend/src/utils/conversationExport.ts`
+- Async single-case прогон:
+  - `POST /api/eval/run-async/start`
+  - `GET /api/eval/run-async/:job_id`
+- Session logs:
+  - `llm_normalizer/data/assistant_sessions/*.json`
+- Autoruns/annotations/post-analysis API:
+  - `llm_normalizer/backend/src/routes/autoRuns.ts`
+
+## Что добавлено для project-scoped Codex automation
+
+- root `.codex/`:
+  - `.codex/config.toml`
+  - `.codex/agents/orchestrator.toml`
+  - `.codex/agents/domain_coder.toml`
+  - `.codex/agents/domain_analyst.toml`
+  - `.codex/skills/domain-case-loop/...`
+- helper script:
+  - `scripts/domain_case_loop.py`
+- artifact root:
+  - `artifacts/domain_runs/`
+
+## Почему это лучший путь для текущего репо
+
+Мы не встраиваем новый orchestration runtime в продуктовый backend.
+Мы поднимаем отдельный Codex-driven outer loop, который использует уже существующие:
+
+1. assistant runtime;
+2. technical debug payload;
+3. session logs;
+4. async eval single-case flow.
+
+Это позволяет автоматизировать текущую ручную схему без architecture drift.
+
+## Два режима baseline/rerun capture
+
+### 1. Автоматический run-case
+
+Использует живой backend:
+
+```powershell
+python scripts/domain_case_loop.py run-case `
+  --domain open_contracts `
+  --question "какие есть открытые договора на март 2020" `
+  --analysis-date 2020-03-31 `
+  --expected-capability contracts_with_open_settlements_as_of_date `
+  --expected-result-mode confirmed_balance
+```
+
+Что делает:
+
+1. создает `artifacts/domain_runs/<case_id>/`;
+2. запускает `assistant_stage1` на одном вопросе;
+3. ждет completion;
+4. забирает session/report artifacts;
+5. сохраняет `baseline_output.md`, `baseline_debug.json`, `baseline_turn.json` и связанные JSON.
+
+### 2. Импорт уже скопированного техчата
+
+Подходит для текущего исторического режима, где у пользователя уже есть markdown export:
+
+```powershell
+python scripts/domain_case_loop.py import-export `
+  --domain open_contracts `
+  --input "C:\\Users\\DCTOUCH\\Desktop\\акие_есть_открытые_договора_на_март_2020.md"
+```
+
+## Канонический JSON для аналитика
+
+Главный вход аналитика теперь:
+
+- `baseline_turn.json`
+- `rerun_turn.json`
+
+Они содержат:
+
+1. вопрос;
+2. ответ;
+3. `technical_debug_payload`;
+4. session summary;
+5. run/session ids;
+6. report excerpt;
+7. ссылку на markdown export.
+
+## Правило завершения цикла
+
+Кейс считается `accepted`, только если одновременно выполнено:
+
+1. `quality_score >= 80`;
+2. нет unresolved `P0`;
+3. rerun не маскирует heuristic output под confirmed answer.
+
+Во всех остальных случаях итог должен быть:
+
+- `partial`
+- `blocked`
+- `needs_exact_capability`
--- a/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-I-f02pwI7g.json
+++ b/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-I-f02pwI7g.json
@ -0,0 +1,22 @@
+{
+  "suite_id": "assistant_autogen_runtime_job-I-f02pwI7g",
+  "suite_version": "0.1.0",
+  "schema_version": "assistant_autogen_runtime_v0_1",
+  "scenario_count": 1,
+  "case_ids": [
+    "AUTO-001"
+  ],
+  "cases": [
+    {
+      "case_id": "AUTO-001",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "какие есть открытые договора на март 2020"
+        }
+      ]
+    }
+  ]
+}
--- a/scripts/domain_case_loop.py
+++ b/scripts/domain_case_loop.py
@ -0,0 +1,599 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+import textwrap
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
+DEFAULT_SESSIONS_DIR = REPO_ROOT / "llm_normalizer" / "data" / "assistant_sessions"
+DEFAULT_REPORTS_DIR = REPO_ROOT / "llm_normalizer" / "reports"
+DEFAULT_BACKEND_URL = "http://127.0.0.1:8787"
+TECH_SECTION_HEADER = "### technical_debug_payload_json"
+
+
+def dump_json(payload: Any) -> str:
+    return json.dumps(payload, ensure_ascii=False, indent=2)
+
+
+def write_text(file_path: Path, text: str) -> None:
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    file_path.write_text(text, encoding="utf-8", newline="\n")
+
+
+def write_json(file_path: Path, payload: Any) -> None:
+    write_text(file_path, dump_json(payload) + "\n")
+
+
+def sanitize_export_text(value: str) -> str:
+    raw = str(value or "")
+    debug_heading = re.search(
+        r"(?:^|\n)\s*#{0,6}\s*(?:debug_payload_json|technical_breakdown_json|route_summary_json|debug_payload|technical_breakdown)\b",
+        raw,
+        flags=re.IGNORECASE,
+    )
+    pre_cut = raw[: debug_heading.start()] if debug_heading else raw
+    without_debug = re.sub(
+        r"###\s*(?:debug_payload_json|technical_breakdown_json|route_summary_json)[\s\S]*?(?:```[\s\S]*?```|$)",
+        "",
+        pre_cut,
+        flags=re.IGNORECASE,
+    )
+    without_debug = re.sub(
+        r"(?:^|\n)\s*#{0,6}\s*(?:debug_payload_json|technical_breakdown_json|route_summary_json)\b[\s\S]*$",
+        "",
+        without_debug,
+        flags=re.IGNORECASE,
+    )
+    inline_patterns = [
+        re.compile(r"\b(?:debug_payload_json|technical_breakdown_json)\b", re.IGNORECASE),
+        re.compile(r"\b(?:route_summary|semantic_profile|domain_scope|relation_patterns|account_scope)\b", re.IGNORECASE),
+        re.compile(r"\b(?:coverage_report|retrieval_status|problem_unit_state|candidate_evidence)\b", re.IGNORECASE),
+        re.compile(r"\b(?:graph_domain_scope|graph_runtime|selection_reason|why_included)\b", re.IGNORECASE),
+    ]
+    output_lines: list[str] = []
+    for line in without_debug.splitlines():
+        cleaned = line.rstrip()
+        if not cleaned.strip():
+            continue
+        if any(pattern.search(cleaned) for pattern in inline_patterns):
+            continue
+        output_lines.append(cleaned)
+    return "\n".join(output_lines).strip()
+
+
+def build_conversation_export(session_id: str, conversation: list[dict[str, Any]], mode: str = "technical") -> str:
+    include_debug = mode == "technical"
+    lines = [
+        "# Assistant conversation export",
+        f"session_id: {session_id or 'n/a'}",
+        f"export_mode: {mode}",
+        f"exported_at: {datetime.now(timezone.utc).replace(microsecond=0).isoformat()}",
+        "",
+    ]
+    for index, item in enumerate(conversation, start=1):
+        safe_text = sanitize_export_text(str(item.get("text") or ""))
+        lines.append(f"## {index}. {item.get('role', 'unknown')}")
+        lines.append(f"message_id: {item.get('message_id') or 'n/a'}")
+        lines.append(f"created_at: {item.get('created_at') or 'n/a'}")
+        lines.append(f"reply_type: {item.get('reply_type') or 'n/a'}")
+        trace_id = item.get("trace_id")
+        if trace_id:
+            lines.append(f"trace_id: {trace_id}")
+        lines.extend(["", safe_text or "(empty)", ""])
+        if include_debug and item.get("role") == "assistant" and item.get("debug") is not None:
+            lines.extend([TECH_SECTION_HEADER, "```json", dump_json(item["debug"]), "```", ""])
+    return "\n".join(lines)
+
+
+def slugify_case_id(domain: str, explicit_case_id: str | None) -> str:
+    if explicit_case_id:
+        normalized = explicit_case_id.strip()
+        if normalized:
+            return normalized
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    cleaned_domain = re.sub(r"[^0-9A-Za-zА-Яа-я_-]+", "_", domain.strip(), flags=re.UNICODE).strip("_")
+    return f"{cleaned_domain or 'domain_case'}_{timestamp}"
+
+
+def http_json(url: str, *, method: str = "GET", payload: dict[str, Any] | None = None, timeout: int = 30) -> dict[str, Any]:
+    data = None
+    headers = {"Accept": "application/json"}
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+        headers["Content-Type"] = "application/json; charset=utf-8"
+    request = Request(url, data=data, method=method, headers=headers)
+    try:
+        with urlopen(request, timeout=timeout) as response:
+            body = response.read().decode("utf-8")
+    except HTTPError as error:
+        detail = error.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"HTTP {error.code} for {url}: {detail}") from error
+    except URLError as error:
+        raise RuntimeError(f"Failed to reach backend at {url}: {error}") from error
+    try:
+        return json.loads(body)
+    except json.JSONDecodeError as error:
+        raise RuntimeError(f"Backend returned non-JSON payload for {url}") from error
+
+
+def wait_for_job(backend_url: str, job_id: str, timeout_seconds: int, poll_interval_seconds: float) -> dict[str, Any]:
+    deadline = time.time() + timeout_seconds
+    last_status = None
+    while time.time() < deadline:
+        response = http_json(f"{backend_url}/api/eval/run-async/{job_id}")
+        job = response.get("job")
+        if not isinstance(job, dict):
+            raise RuntimeError("Async job response does not contain `job` object")
+        status = str(job.get("status") or "unknown")
+        if status != last_status:
+            print(f"[domain-case-loop] job {job_id}: {status}")
+            last_status = status
+        if status in {"completed", "failed"}:
+            return job
+        time.sleep(poll_interval_seconds)
+    raise TimeoutError(f"Async job {job_id} did not finish within {timeout_seconds} seconds")
+
+
+def wait_for_file(file_path: Path, timeout_seconds: int = 30) -> None:
+    deadline = time.time() + timeout_seconds
+    while time.time() < deadline:
+        if file_path.exists():
+            return
+        time.sleep(0.5)
+    raise FileNotFoundError(f"Timed out waiting for file: {file_path}")
+
+
+def read_json_file(file_path: Path) -> dict[str, Any]:
+    return json.loads(file_path.read_text(encoding="utf-8-sig"))
+
+
+def extract_conversation_from_session(session_record: dict[str, Any]) -> list[dict[str, Any]]:
+    conversation = session_record.get("conversation")
+    if isinstance(conversation, list) and conversation:
+        output: list[dict[str, Any]] = []
+        for item in conversation:
+            if not isinstance(item, dict):
+                continue
+            output.append(
+                {
+                    "message_id": item.get("message_id"),
+                    "role": item.get("role"),
+                    "text": item.get("text") or "",
+                    "reply_type": item.get("reply_type"),
+                    "created_at": item.get("created_at"),
+                    "trace_id": item.get("trace_id"),
+                    "debug": item.get("debug"),
+                }
+            )
+        if output:
+            return output
+
+    turns = session_record.get("turns")
+    if not isinstance(turns, list):
+        return []
+
+    output: list[dict[str, Any]] = []
+    for turn in turns:
+        if not isinstance(turn, dict):
+            continue
+        technical_json = turn.get("technical_json")
+        if not isinstance(technical_json, dict):
+            continue
+        for item in (technical_json.get("user_message"), technical_json.get("assistant_message")):
+            if not isinstance(item, dict):
+                continue
+            output.append(
+                {
+                    "message_id": item.get("message_id"),
+                    "role": item.get("role"),
+                    "text": item.get("text") or "",
+                    "reply_type": item.get("reply_type"),
+                    "created_at": item.get("created_at"),
+                    "trace_id": item.get("trace_id"),
+                    "debug": item.get("debug"),
+                }
+            )
+    return output
+
+
+def find_last_assistant(conversation: list[dict[str, Any]]) -> dict[str, Any]:
+    for item in reversed(conversation):
+        if item.get("role") == "assistant":
+            return item
+    raise RuntimeError("Conversation does not contain assistant message")
+
+
+def find_last_user_before(conversation: list[dict[str, Any]], assistant_message_id: Any) -> dict[str, Any] | None:
+    before_assistant: list[dict[str, Any]] = []
+    for item in conversation:
+        if item.get("message_id") == assistant_message_id:
+            break
+        before_assistant.append(item)
+    for item in reversed(before_assistant):
+        if item.get("role") == "user":
+            return item
+    return None
+
+
+def extract_last_turn(session_record: dict[str, Any]) -> dict[str, Any] | None:
+    turns = session_record.get("turns")
+    if not isinstance(turns, list) or not turns:
+        return None
+    last_turn = turns[-1]
+    return last_turn if isinstance(last_turn, dict) else None
+
+
+def extract_report_case(report_record: dict[str, Any], case_id: str) -> dict[str, Any] | None:
+    results = report_record.get("results")
+    if not isinstance(results, list):
+        return None
+    for item in results:
+        if isinstance(item, dict) and str(item.get("case_id") or "") == case_id:
+            return item
+    return None
+
+
+def build_turn_artifact(
+    *,
+    slot: str,
+    domain: str,
+    case_id: str,
+    question: str | None,
+    session_id: str,
+    conversation: list[dict[str, Any]],
+    session_record: dict[str, Any] | None,
+    job_record: dict[str, Any] | None,
+    report_case: dict[str, Any] | None,
+    export_file_name: str,
+) -> dict[str, Any]:
+    last_assistant = find_last_assistant(conversation)
+    last_user = find_last_user_before(conversation, last_assistant.get("message_id"))
+    final_question = question or (last_user.get("text") if isinstance(last_user, dict) else None)
+    last_turn = extract_last_turn(session_record or {})
+    return {
+        "schema_version": "domain_case_turn_artifact_v1",
+        "artifact_slot": slot,
+        "domain": domain,
+        "case_id": case_id,
+        "question": final_question,
+        "session_id": session_id,
+        "run": {
+            "job_id": job_record.get("job_id") if isinstance(job_record, dict) else None,
+            "run_id": job_record.get("run_id") if isinstance(job_record, dict) else None,
+            "analysis_date": job_record.get("analysis_date") if isinstance(job_record, dict) else None,
+            "report_case_available": report_case is not None,
+        },
+        "user_message": last_user,
+        "assistant_message": last_assistant,
+        "technical_debug_payload": last_assistant.get("debug"),
+        "session_summary": {
+            "schema_version": session_record.get("schema_version") if isinstance(session_record, dict) else None,
+            "updated_at": session_record.get("updated_at") if isinstance(session_record, dict) else None,
+            "trace_ids": session_record.get("trace_ids") if isinstance(session_record, dict) else None,
+            "reply_types": session_record.get("reply_types") if isinstance(session_record, dict) else None,
+            "investigation_state": session_record.get("investigation_state") if isinstance(session_record, dict) else None,
+            "address_navigation_state": session_record.get("address_navigation_state") if isinstance(session_record, dict) else None,
+            "last_turn": last_turn,
+        },
+        "report_case": report_case,
+        "export_markdown_file": export_file_name,
+    }
+
+
+def ensure_case_brief(
+    case_dir: Path,
+    *,
+    domain: str,
+    question: str | None,
+    expected_capability: str | None,
+    expected_result_mode: str | None,
+) -> None:
+    file_path = case_dir / "case_brief.md"
+    if file_path.exists():
+        return
+    content = textwrap.dedent(
+        f"""\
+        # Case brief
+
+        ## Domain
+        `{domain}`
+
+        ## Raw user question
+        `{question or "<fill me>"}`
+
+        ## Expected business meaning
+        - <fill me>
+
+        ## Expected capability
+        - {expected_capability or "<fill me>"}
+
+        ## Expected result mode
+        - {expected_result_mode or "<fill me>"}
+
+        ## Constraints
+        - no architecture changes
+        - 1C/MCP first
+        - no fabricated values
+        - heuristic is not product success
+        - accepted requires analyst quality score >= 80 and zero unresolved P0
+
+        ## Known current behavior
+        - <fill me>
+
+        ## Draft acceptance criteria
+        - <fill me>
+        """
+    )
+    write_text(file_path, content)
+
+
+def save_capture_bundle(
+    *,
+    case_dir: Path,
+    slot: str,
+    export_markdown: str,
+    debug_payload: Any,
+    turn_artifact: dict[str, Any],
+    session_record: dict[str, Any] | None,
+    job_record: dict[str, Any] | None,
+    report_case: dict[str, Any] | None,
+) -> None:
+    write_text(case_dir / f"{slot}_output.md", export_markdown)
+    write_json(case_dir / f"{slot}_debug.json", debug_payload if debug_payload is not None else {})
+    write_json(case_dir / f"{slot}_turn.json", turn_artifact)
+    if session_record is not None:
+        write_json(case_dir / f"{slot}_session.json", session_record)
+    if job_record is not None:
+        write_json(case_dir / f"{slot}_job.json", job_record)
+    if report_case is not None:
+        write_json(case_dir / f"{slot}_report_case.json", report_case)
+
+
+def parse_metadata_line(line: str) -> tuple[str, str] | None:
+    if ":" not in line:
+        return None
+    key, value = line.split(":", 1)
+    return key.strip(), value.strip()
+
+
+def parse_export_markdown(text: str) -> tuple[str, list[dict[str, Any]]]:
+    session_id = "n/a"
+    session_match = re.search(r"^session_id:\s*(.+?)\s*$", text, flags=re.MULTILINE)
+    if session_match:
+        session_id = session_match.group(1).strip()
+
+    section_pattern = re.compile(r"^##\s+\d+\.\s+(user|assistant)\s*$", flags=re.MULTILINE)
+    sections = list(section_pattern.finditer(text))
+    conversation: list[dict[str, Any]] = []
+    for index, match in enumerate(sections):
+        role = match.group(1)
+        start = match.end()
+        end = sections[index + 1].start() if index + 1 < len(sections) else len(text)
+        block = text[start:end].lstrip("\r\n")
+        lines = block.splitlines()
+        metadata: dict[str, Any] = {"role": role}
+        cursor = 0
+        while cursor < len(lines):
+            line = lines[cursor]
+            if not line.strip():
+                cursor += 1
+                break
+            meta = parse_metadata_line(line)
+            if not meta:
+                break
+            key, value = meta
+            metadata[key] = value
+            cursor += 1
+
+        body_lines = lines[cursor:]
+        debug_payload = None
+        debug_start = None
+        for body_index, line in enumerate(body_lines):
+            if line.strip().lower() == TECH_SECTION_HEADER.lower():
+                debug_start = body_index
+                break
+        if debug_start is not None:
+            body_text_lines = body_lines[:debug_start]
+            debug_lines = body_lines[debug_start + 1 :]
+            debug_text = "\n".join(debug_lines).strip()
+            fenced = re.search(r"```json\s*(.*?)\s*```", debug_text, flags=re.DOTALL | re.IGNORECASE)
+            if fenced:
+                debug_text = fenced.group(1).strip()
+            if debug_text:
+                debug_payload = json.loads(debug_text)
+        else:
+            body_text_lines = body_lines
+
+        conversation.append(
+            {
+                "message_id": metadata.get("message_id"),
+                "role": role,
+                "text": "\n".join(body_text_lines).strip(),
+                "reply_type": metadata.get("reply_type"),
+                "created_at": metadata.get("created_at"),
+                "trace_id": metadata.get("trace_id"),
+                "debug": debug_payload,
+            }
+        )
+
+    if not conversation:
+        raise RuntimeError("Could not parse conversation sections from export markdown")
+    return session_id, conversation
+
+
+def handle_run_case(args: argparse.Namespace) -> int:
+    case_id = slugify_case_id(args.domain, args.case_id)
+    case_dir = Path(args.output_root).resolve() / case_id
+    case_dir.mkdir(parents=True, exist_ok=True)
+    ensure_case_brief(
+        case_dir,
+        domain=args.domain,
+        question=args.question,
+        expected_capability=args.expected_capability,
+        expected_result_mode=args.expected_result_mode,
+    )
+
+    payload: dict[str, Any] = {
+        "eval_target": "assistant_stage1",
+        "questions": [args.question],
+        "useMock": bool(args.use_mock),
+        "mode": "standard",
+    }
+    if args.analysis_date:
+        payload["analysis_date"] = args.analysis_date
+
+    start_response = http_json(f"{args.backend_url}/api/eval/run-async/start", method="POST", payload=payload)
+    job = start_response.get("job")
+    if not isinstance(job, dict):
+        raise RuntimeError("Async start response does not contain `job` object")
+    job_id = str(job.get("job_id") or "")
+    if not job_id:
+        raise RuntimeError("Async start response does not contain job_id")
+
+    final_job = wait_for_job(args.backend_url, job_id, args.timeout_seconds, args.poll_interval_seconds)
+    if str(final_job.get("status") or "") != "completed":
+        raise RuntimeError(f"Async job did not complete successfully: {final_job.get('status')}")
+
+    run_id = str(final_job.get("run_id") or "")
+    report_case_id = "AUTO-001"
+    session_id = f"{run_id}-{report_case_id}"
+    session_file = Path(args.sessions_dir).resolve() / f"{session_id}.json"
+    wait_for_file(session_file)
+
+    session_record = read_json_file(session_file)
+    conversation = extract_conversation_from_session(session_record)
+    export_markdown = build_conversation_export(session_id, conversation, mode="technical")
+
+    report_case = None
+    report_file = Path(args.reports_dir).resolve() / f"{run_id}.json"
+    if report_file.exists():
+        report_record = read_json_file(report_file)
+        report_case = extract_report_case(report_record, report_case_id)
+
+    turn_artifact = build_turn_artifact(
+        slot=args.slot,
+        domain=args.domain,
+        case_id=case_id,
+        question=args.question,
+        session_id=session_id,
+        conversation=conversation,
+        session_record=session_record,
+        job_record=final_job,
+        report_case=report_case,
+        export_file_name=f"{args.slot}_output.md",
+    )
+    save_capture_bundle(
+        case_dir=case_dir,
+        slot=args.slot,
+        export_markdown=export_markdown,
+        debug_payload=turn_artifact.get("technical_debug_payload"),
+        turn_artifact=turn_artifact,
+        session_record=session_record,
+        job_record=final_job,
+        report_case=report_case,
+    )
+    print(f"[domain-case-loop] saved {args.slot} artifacts to {case_dir}")
+    print(f"[domain-case-loop] session_id={session_id}")
+    return 0
+
+
+def handle_import_export(args: argparse.Namespace) -> int:
+    export_text = Path(args.input).read_text(encoding="utf-8-sig")
+    session_id, conversation = parse_export_markdown(export_text)
+    case_id = slugify_case_id(args.domain, args.case_id)
+    case_dir = Path(args.output_root).resolve() / case_id
+    case_dir.mkdir(parents=True, exist_ok=True)
+    last_assistant = find_last_assistant(conversation)
+    last_user = find_last_user_before(conversation, last_assistant.get("message_id"))
+    question = args.question or (last_user.get("text") if isinstance(last_user, dict) else None)
+    ensure_case_brief(
+        case_dir,
+        domain=args.domain,
+        question=question,
+        expected_capability=args.expected_capability,
+        expected_result_mode=args.expected_result_mode,
+    )
+
+    turn_artifact = build_turn_artifact(
+        slot=args.slot,
+        domain=args.domain,
+        case_id=case_id,
+        question=question,
+        session_id=session_id,
+        conversation=conversation,
+        session_record=None,
+        job_record=None,
+        report_case=None,
+        export_file_name=f"{args.slot}_output.md",
+    )
+    save_capture_bundle(
+        case_dir=case_dir,
+        slot=args.slot,
+        export_markdown=export_text,
+        debug_payload=last_assistant.get("debug"),
+        turn_artifact=turn_artifact,
+        session_record=None,
+        job_record=None,
+        report_case=None,
+    )
+    print(f"[domain-case-loop] imported {args.slot} artifacts to {case_dir}")
+    return 0
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Repo-native helper for NDC_1C domain-case orchestration")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    run_case = subparsers.add_parser("run-case", help="Run one assistant_stage1 case through the existing backend and save artifacts")
+    run_case.add_argument("--domain", required=True)
+    run_case.add_argument("--question", required=True)
+    run_case.add_argument("--case-id")
+    run_case.add_argument("--slot", default="baseline", choices=["baseline", "rerun"])
+    run_case.add_argument("--analysis-date")
+    run_case.add_argument("--backend-url", default=DEFAULT_BACKEND_URL)
+    run_case.add_argument("--output-root", default=str(DEFAULT_ARTIFACTS_ROOT))
+    run_case.add_argument("--sessions-dir", default=str(DEFAULT_SESSIONS_DIR))
+    run_case.add_argument("--reports-dir", default=str(DEFAULT_REPORTS_DIR))
+    run_case.add_argument("--timeout-seconds", type=int, default=300)
+    run_case.add_argument("--poll-interval-seconds", type=float, default=1.5)
+    run_case.add_argument("--expected-capability")
+    run_case.add_argument("--expected-result-mode")
+    run_case.add_argument("--use-mock", action="store_true")
+    run_case.set_defaults(func=handle_run_case)
+
+    import_export = subparsers.add_parser("import-export", help="Import an existing technical export markdown and build artifacts")
+    import_export.add_argument("--domain", required=True)
+    import_export.add_argument("--input", required=True)
+    import_export.add_argument("--question")
+    import_export.add_argument("--case-id")
+    import_export.add_argument("--slot", default="baseline", choices=["baseline", "rerun"])
+    import_export.add_argument("--output-root", default=str(DEFAULT_ARTIFACTS_ROOT))
+    import_export.add_argument("--expected-capability")
+    import_export.add_argument("--expected-result-mode")
+    import_export.set_defaults(func=handle_import_export)
+
+    return parser
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+    try:
+        return int(args.func(args))
+    except Exception as error:  # noqa: BLE001
+        print(f"[domain-case-loop] error: {error}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())