diff --git a/.codex/agents/domain_analyst.toml b/.codex/agents/domain_analyst.toml index 30bfd62..1d0371d 100644 --- a/.codex/agents/domain_analyst.toml +++ b/.codex/agents/domain_analyst.toml @@ -18,17 +18,19 @@ Your job is to produce a detailed verdict in Russian with strong business focus. Always answer in a strict structure: 1. Смысл вопроса -2. Что реально посчитано -3. Где расхождение по бизнес-смыслу -4. Где route / capability mismatch -5. Evidence quality -6. P0 defects -7. P1 defects -8. P2 defects -9. Minimal patch directions -10. Acceptance criteria for rerun -11. Quality score -12. Loop decision +2. Главный пользовательский путь и дерево сценария +3. Что реально посчитано +4. Где расхождение по бизнес-смыслу +5. Где route / capability mismatch +6. Evidence quality +7. P0 defects +8. P1 defects +9. P2 defects +10. Minimal patch directions +11. Acceptance matrix for rerun +12. Acceptance criteria for rerun +13. Quality score +14. Loop decision Rules: - Call out non-business garbage explicitly. @@ -42,10 +44,17 @@ Rules: - In cascading scenarios, verify temporal continuity explicitly: if the user says `на эту дату` / `на ту дату`, compare the carried date or period in debug filters to the originating turn and call out any drift as a defect. - Verify answer granularity explicitly: if the user asked for item-level residues, do not accept a document-level dump as a correct answer. - Verify sort/order semantics when the wording implies chronology or ranking, for example `старые закупки` should be oldest-first. +- Treat the acceptance unit as a scenario tree, not a flat list of prompts. +- Under `Главный пользовательский путь и дерево сценария`, explicitly name the root node, critical child nodes, critical edges, and the primary user path. +- Under `Acceptance matrix for rerun`, list at least the critical nodes/edges and mark each one by wording family: `canonical`, `colloquial`, `ui_selected_object`. +- Distinguish these defect classes explicitly when relevant: `semantic_understanding_gap`, `edge_carryover_gap`, `answer_shape_mismatch`, `ordering_semantics_mismatch`, `runtime_capability_gap`, `loop_coverage_gap`. +- If the root node works but the primary user path is broken at the first selected-object drilldown, treat that as a real failure of domain hardening. +- If the runtime nearly supports the path but the loop never validated the realistic wording family, call it `loop_coverage_gap`, not product success. Quality score: - Output one integer score from 0 to 100. - Score >= 80 means the case can be accepted only if there is no unresolved P0. +- Score >= 80 also requires the primary user path and its critical edges to be green across canonical, colloquial, and UI-selected-object coverage where applicable. - If score < 80, loop_decision must be continue, partial, blocked, or needs_exact_capability. """ nickname_candidates = ["Lens", "Vector", "Delta"] diff --git a/.codex/agents/orchestrator.toml b/.codex/agents/orchestrator.toml index e49f37a..332a47e 100644 --- a/.codex/agents/orchestrator.toml +++ b/.codex/agents/orchestrator.toml @@ -23,10 +23,11 @@ Your job: 4. Ask domain_analyst for a strict verdict in Russian using machine-readable artifacts first: - case mode: baseline_turn.json, then baseline_output.md / baseline_debug.json - scenario mode: scenario_state.json and per-step turn.json, then scenario_summary.md / per-step debug.json -5. Feed the verdict to domain_coder for the smallest defensible domain-only patch. -6. Capture rerun artifacts or scenario rerun artifacts. -7. Ask domain_analyst for before/after comparison and a quality score. -8. End with one status: accepted | partial | blocked | needs_exact_capability. +5. Before patching, define or update the scenario tree: root node, critical child nodes, critical edges, primary user path, required paraphrase families, and required carryover invariants. +6. Feed the verdict to domain_coder for the smallest defensible domain-only patch. +7. Capture rerun artifacts or scenario rerun artifacts. +8. Ask domain_analyst for before/after comparison and a quality score. +9. End with one status: accepted | partial | blocked | needs_exact_capability. Hard rules: - Do not change architecture. @@ -38,21 +39,30 @@ Hard rules: - In autonomous loop mode, do not stop only because the analyst says `needs_exact_capability` or `partial` if there is still autonomous implementation work to do. - Stop early when the analyst sets `requires_user_decision = true` because the next step would otherwise require guessing a missing required observation, accepting a risky architecture fork, choosing a business-critical tradeoff, or pushing through a hacky / brittle / disproportionally complex fix. - Treat true runtime or 1C availability failures as `blocked`, not as a normal low-score iteration. +- Treat the acceptance unit as a scenario tree with explicit nodes and edges, not as a flat prompt list. +- Prioritize the primary user path before secondary branches or broad pool coverage. - For follow-up-heavy domains, capture and rerun at least one colloquial/slang variant and one UI-generated selected-object follow-up variant instead of validating only canonical wording. - For cascading date-sensitive scenarios, rerun at least one `на эту дату` / `на ту дату` follow-up and verify that the originating date or period survives into debug filters. - If the business question asks for residues/items/contracts but the answer switched to raw documents or movements, treat that as a real defect, not as acceptable detail. - If the wording implies chronology or ranking such as `старые закупки`, verify oldest-first ordering explicitly. +- If the root node works but the first critical selected-object or drilldown edge is still broken, do not treat the scenario as hardened. +- Require an explicit `scenario_acceptance_matrix.md` artifact for follow-up-heavy domains and packs. +- Use the matrix to drive coder tasks: patch the narrowest broken edge or wording family first, not the whole domain at once. +- Distinguish `runtime_capability_gap` from `loop_coverage_gap`; do not confuse “not validated in the loop” with “product already works”. Acceptance gate: - accepted requires analyst quality_score >= 80 - accepted requires zero unresolved P0 defects - accepted requires no business-critical regression in rerun +- accepted requires green critical edges on the primary user path +- accepted requires green coverage for canonical + colloquial + UI-selected-object variants on critical branches when those branches exist in the product UX Required artifacts per cycle: - case_brief.md - baseline_output.md - baseline_debug.json - baseline_turn.json +- scenario_acceptance_matrix.md - scenario_manifest.json - scenario_state.json - scenario_summary.md diff --git a/.codex/skills/domain-case-loop/SKILL.md b/.codex/skills/domain-case-loop/SKILL.md index cec1660..baf64e4 100644 --- a/.codex/skills/domain-case-loop/SKILL.md +++ b/.codex/skills/domain-case-loop/SKILL.md @@ -27,6 +27,7 @@ This skill packages the standard workflow for iterating on one concrete domain c ## Repo-specific runtime map Read `references/repo_runtime_map.md` before the first real cycle. +For follow-up-heavy domains, also read `references/scenario_tree_acceptance_canon.md` before scenario mode, pack mode, or autonomous pack-loop mode. Use these repo-native capture paths: - automated capture: `python scripts/domain_case_loop.py run-case ...` @@ -49,10 +50,14 @@ Use scenario mode when the user brings a linked chain such as: - "was it later sold" In scenario mode: +- model the domain as a scenario tree, not as a flat list of prompts; +- define one `root` plus critical child drilldowns and the primary user path; +- treat `selected-object` follow-up branches as first-class business paths when the UI exposes selectable entities; - create `scenario_manifest.json` first; - keep one shared `session_id`; - capture each step under `artifacts/domain_runs//steps//`; -- preserve semantic carryover via explicit `scenario_state.json`, not vague model memory. +- preserve semantic carryover via explicit `scenario_state.json`, not vague model memory; +- require a `scenario_acceptance_matrix.md` artifact that records node/edge coverage and paraphrase-family coverage. Use `references/scenario_manifest_template.json`. @@ -62,8 +67,10 @@ Use pack mode when the user brings a whole domain pool and wants grouped orchest In pack mode: - group the question pool into several coherent scenarios; +- define the root and critical branches inside each scenario instead of validating only isolated prompts; - capture each scenario under `artifacts/domain_runs//scenarios//`; - write aggregate `pack_state.json` and `pack_summary.md`; +- aggregate scenario acceptance through node/edge coverage rather than a raw question count; - treat unresolved scenarios as enablement backlog, not as a reason to drop the domain. ### Autonomous pack-loop mode @@ -79,6 +86,8 @@ In autonomous pack-loop mode: - do not stop just because the analyst returns `needs_exact_capability` or `partial` if autonomous domain enablement work still remains. - treat `quality score >= 80` as the target gate, not as permission to keep pushing through hard blockers, missing essential observations, or unsafe fixes. - for follow-up-heavy domains, include conversational variants, slang/typo variants, and UI-generated selected-object follow-ups in the acceptance slice instead of validating only one canonical wording. +- do not mark a domain path as hardened only because the root node works; critical edges and drilldowns must pass as well. +- treat broken tree edges, missing carryover, or wrong answer shape as blockers for acceptance even when the underlying root intent is already exact. ### Step 1 - Normalize the case @@ -88,6 +97,9 @@ Create `artifacts/domain_runs//case_brief.md` with: - expected business meaning - expected exact capability - expected result mode +- primary user path +- required paraphrase families +- required carryover invariants - known constraints - acceptance criteria draft @@ -113,6 +125,7 @@ Spawn `domain_analyst` and provide: - `baseline_turn.json` - `baseline_output.md` - `baseline_debug.json` +- `scenario_acceptance_matrix.md` when the case is follow-up-heavy or scenario-based - optional relevant code excerpts or file paths Require a full verdict using `references/verdict_template.md`. @@ -121,6 +134,7 @@ The verdict must explicitly say whether the case is: - an existing in-contour regression; - a missing route/intent/capability inside project scope; - a true out-of-scope request. +- a `runtime_capability_gap`, `semantic_understanding_gap`, `edge_carryover_gap`, `answer_shape_mismatch`, `ordering_semantics_mismatch`, or `loop_coverage_gap`. ### Step 4 - Domain patch @@ -142,6 +156,7 @@ Capture: - `rerun_debug.json` - `rerun_turn.json` - `patch_summary.md` +- updated `scenario_acceptance_matrix.md` when the rerun belongs to a scenario or pack ### Step 6 - Before/after analysis @@ -172,6 +187,8 @@ Accepted requires: - quality score >= 80 - no unresolved P0 defects - no silent heuristic masking +- critical scenario-tree edges on the primary user path are green +- canonical, colloquial, and UI-selected-object variants are green for critical branches ## Hard rules @@ -185,9 +202,11 @@ Accepted requires: - Preserve successful baseline scenarios. - Treat follow-up continuity as a state-machine problem, not a wording problem. - Do not accept a domain as hardened if only canonical phrasing works while colloquial or UI-generated follow-up phrasing still breaks the exact contour. +- Do not accept a domain as hardened if the root node works but a critical selected-object or drilldown edge still breaks. - Treat temporal carryover loss in a cascading scenario as a real regression: if the user says `на эту дату` / `на ту дату`, the analyst must verify that the exact carried date or period survived into `extracted_filters`. - Treat answer-shape mismatch as a scoring defect: if the user asked for items / residues / contracts, do not accept an answer that switched to raw documents, movements, or another lower-level object without saying so explicitly. - Treat ordering semantics as part of correctness when the wording implies ranking or chronology, for example `старые закупки` => oldest-first rather than newest-first. +- Treat primary user-path failures as more important than supporting-path polish: if the user cannot go from root list -> selected object -> first drilldown, the scenario is not accepted. ## Domain-specific framing diff --git a/.codex/skills/domain-case-loop/references/artifact_layout.md b/.codex/skills/domain-case-loop/references/artifact_layout.md index 0f17f9e..4891df6 100644 --- a/.codex/skills/domain-case-loop/references/artifact_layout.md +++ b/.codex/skills/domain-case-loop/references/artifact_layout.md @@ -10,6 +10,7 @@ artifacts/domain_runs// - baseline_session.json - baseline_job.json - baseline_report_case.json +- scenario_acceptance_matrix.md - analyst_verdict.md - coder_plan.md - patch_summary.md @@ -29,6 +30,7 @@ artifacts/domain_runs// - scenario_manifest.json - manifest_source.txt - scenario_state.json +- scenario_acceptance_matrix.md - scenario_output.md - scenario_summary.md - final_status.md @@ -47,6 +49,7 @@ artifacts/domain_runs// - pack_manifest.json - manifest_source.txt - pack_state.json +- scenario_acceptance_matrix.md - pack_summary.md - final_status.md - scenarios//... @@ -58,6 +61,7 @@ artifacts/domain_runs// - loop_state.json - loop_summary.md - final_status.md +- scenario_acceptance_matrix.md - iterations//analyst_prompt.md - iterations//analyst_verdict.json - iterations//analyst_exec.stdout.log diff --git a/.codex/skills/domain-case-loop/references/case_brief_template.md b/.codex/skills/domain-case-loop/references/case_brief_template.md index 8504fff..18b3abd 100644 --- a/.codex/skills/domain-case-loop/references/case_brief_template.md +++ b/.codex/skills/domain-case-loop/references/case_brief_template.md @@ -15,6 +15,23 @@ ## Expected result mode - confirmed_balance / confirmed_tax_liability / partial / technical_insufficiency / other +## Primary user path +- root: +- critical child: +- selected-object follow-up: + +## Required paraphrase families +- canonical +- colloquial +- ui_selected_object + +## Required carryover invariants +- selected object / item +- date or period +- warehouse if relevant +- organization if relevant +- expected answer shape + ## Contour status - in_contour / outside_current_contour / unknown @@ -33,3 +50,6 @@ ## Draft acceptance criteria - ... +- root node works +- critical edges on the primary user path work +- colloquial and UI-generated follow-up variants work diff --git a/.codex/skills/domain-case-loop/references/repo_runtime_map.md b/.codex/skills/domain-case-loop/references/repo_runtime_map.md index fbf6f99..30fefe6 100644 --- a/.codex/skills/domain-case-loop/references/repo_runtime_map.md +++ b/.codex/skills/domain-case-loop/references/repo_runtime_map.md @@ -26,6 +26,7 @@ 8. Use `pack_state.json` plus per-scenario `scenario_state.json` as canonical analyst input for pack mode. 9. Use `loop_state.json` plus per-iteration `analyst_verdict.json` / `coder_result.json` as canonical analyst input for autonomous pack-loop mode. 10. Use `baseline_output.md` / `rerun_output.md` or per-step `output.md` as human-readable paired artifacts. +11. For follow-up-heavy domains, use `scenario_acceptance_matrix.md` as the canonical coverage view for scenario-tree nodes, edges, and paraphrase families. ## Default run assumptions @@ -42,3 +43,4 @@ - Reuse current assistant runtime; do not build a parallel execution lane. - Preserve UTF-8 without BOM for every generated artifact. - Do not overwrite existing AGENTS rules; extend them. +- Do not treat a root node success as domain acceptance when selected-object or drilldown edges on the primary user path are still broken. diff --git a/.codex/skills/domain-case-loop/references/scenario_manifest_template.json b/.codex/skills/domain-case-loop/references/scenario_manifest_template.json index d340fd8..b9b4dbb 100644 --- a/.codex/skills/domain-case-loop/references/scenario_manifest_template.json +++ b/.codex/skills/domain-case-loop/references/scenario_manifest_template.json @@ -4,6 +4,26 @@ "domain": "inventory_stock", "title": "Inventory stock -> supplier provenance chain", "description": "Shared-session scenario for warehouse stock, supplier provenance, and downstream document tracing.", + "acceptance_canon": { + "root_step_id": "step_01_inventory", + "primary_user_path": [ + "step_01_inventory", + "step_02_supplier", + "step_03_documents" + ], + "required_paraphrase_families": [ + "canonical", + "colloquial", + "ui_selected_object" + ], + "required_carryover_invariants": [ + "selected_object", + "date_scope", + "warehouse_scope", + "organization_scope", + "answer_shape" + ] + }, "analysis_context": { "as_of_date": "2026-04-13", "source": "scenario_manifest" @@ -13,6 +33,8 @@ "step_id": "step_01_inventory", "title": "Current stock snapshot", "question": "Какие товары сейчас лежат на складе", + "node_role": "root", + "paraphrase_family": "canonical", "expected_capability": "confirmed_inventory_on_hand_as_of_date", "expected_result_mode": "confirmed_balance" }, @@ -20,13 +42,19 @@ "step_id": "step_02_supplier", "title": "Supplier provenance", "question": "У какого поставщика купили {{step_01_inventory.entries[0].item}}", - "depends_on": ["step_01_inventory"] + "node_role": "critical_child", + "paraphrase_family": "canonical", + "depends_on": ["step_01_inventory"], + "required_carryover_invariants": ["selected_object", "date_scope"] }, { "step_id": "step_03_documents", "title": "Purchase documents", "question": "По каким документам был куплен {{step_01_inventory.entries[0].item}}", - "depends_on": ["step_01_inventory", "step_02_supplier"] + "node_role": "critical_child", + "paraphrase_family": "canonical", + "depends_on": ["step_01_inventory", "step_02_supplier"], + "required_carryover_invariants": ["selected_object", "date_scope"] } ] } diff --git a/.codex/skills/domain-case-loop/references/scenario_tree_acceptance_canon.md b/.codex/skills/domain-case-loop/references/scenario_tree_acceptance_canon.md new file mode 100644 index 0000000..8b44cb6 --- /dev/null +++ b/.codex/skills/domain-case-loop/references/scenario_tree_acceptance_canon.md @@ -0,0 +1,146 @@ +# Scenario-tree acceptance canon + +## Core idea + +For follow-up-heavy business domains, the unit of acceptance is not a flat list of isolated questions. + +The unit of acceptance is a **scenario tree**: +- a root business question; +- one or more critical child drilldowns; +- explicit transitions between steps; +- explicit semantic carryover between steps. + +If the root works but a critical child transition breaks, the domain is **not** hardened. + +## Model the domain as a tree + +For each scenario, define: +- `root node` +- `critical child nodes` +- `critical edges` +- `primary user path` + +Example for inventory: +- root: stock snapshot on date +- child: selected item -> supplier provenance +- child: selected item -> purchase documents +- child: selected item -> aging on the same date +- child: selected item -> sale trace + +The primary user path is the path a real user is most likely to take first, not the prettiest canonical wording. + +## Node acceptance + +A node is considered covered only if all of these are true: +- the business meaning is understood correctly; +- the expected intent / capability is selected; +- the answer shape matches the requested business object; +- the answer begins with a direct user-facing answer when such an answer is expected; +- the answer is evidence-backed rather than heuristic-masked. + +Examples: +- asking for supplier provenance must answer with the supplier first, not only with raw documents; +- asking for old stock must answer with item-level old-stock positions, not with a raw document dump; +- asking for residues/items/contracts must not silently downgrade to lower-level movements. + +## Edge acceptance + +Each critical edge must define its required carryover invariants. + +Typical invariants: +- selected object survives from previous assistant output +- originating date / period survives into follow-up filters +- warehouse survives if the follow-up still targets the same stock slice +- organization survives if the previous slice was organization-bound +- route family remains in the same business contour unless the user clearly changed intent + +If an edge loses a required invariant, that is a real regression even if the target node works in isolation. + +## Mandatory paraphrase families + +Every critical node or edge must be validated in a small paraphrase family instead of one curated wording only. + +Minimum family: +- `canonical` +- `colloquial` +- `ui_selected_object` + +Examples: +- canonical: `От какого поставщика куплен товар X` +- colloquial: `Кто поставил этот товар` +- ui_selected_object: `По выбранному объекту "X": кто это поставил нам` + +If canonical works but colloquial or UI-generated follow-up fails, the node/edge is not accepted. + +## Acceptance matrix + +The analyst must produce or update a `scenario_acceptance_matrix.md` artifact for every multi-step scenario or pack. + +Minimum matrix columns: +- scenario id +- node id or edge id +- user path role (`root`, `critical_child`, `supporting`) +- wording family (`canonical`, `colloquial`, `ui_selected_object`) +- expected business meaning +- expected intent +- expected capability / recipe +- required carryover invariants +- expected answer shape +- actual outcome +- status (`pass`, `partial`, `fail`) +- defect class + +## Defect classes + +Use these classes explicitly: +- `semantic_understanding_gap` +- `edge_carryover_gap` +- `answer_shape_mismatch` +- `ordering_semantics_mismatch` +- `runtime_capability_gap` +- `loop_coverage_gap` + +Definitions: +- `semantic_understanding_gap`: the system did not understand the real user meaning +- `edge_carryover_gap`: the follow-up lost date / object / scope across steps +- `answer_shape_mismatch`: the business object in the answer does not match the requested object +- `ordering_semantics_mismatch`: ranking / chronology semantics are wrong +- `runtime_capability_gap`: the product contour truly lacks the route / intent / capability / extractor / recipe +- `loop_coverage_gap`: the runtime could support the path or nearly support it, but the analyst/orchestrator never treated that path as mandatory acceptance coverage + +## Analyst responsibilities + +The analyst must: +- review the scenario tree, not just individual turns; +- compare expected and actual user path transitions; +- call out broken edges explicitly; +- verify colloquial and UI-generated variants as first-class coverage; +- verify direct-answer-first behavior where the user asked a direct lookup question; +- verify answer granularity and ordering semantics; +- lower the score when any critical edge or paraphrase family is broken. + +## Orchestrator responsibilities + +The orchestrator must: +- define the tree before iterating deeply; +- prioritize the primary user path first; +- rerun at least one colloquial variant and one UI-selected-object variant for each critical branch; +- treat a broken critical edge as an unfinished scenario even if the root node works; +- route coder work to the narrowest broken edge or node rather than issuing broad “improve the domain” tasks. + +## Stop and acceptance rules + +Do not accept a domain when: +- only the root node works; +- only one curated phrasing works; +- selected-object follow-up is broken; +- `на эту дату` / `на ту дату` loses the originating date; +- the answer shape is wrong for the business question; +- chronology / ranking semantics are inverted. + +Accepted requires: +- score >= 80 +- no unresolved P0 +- critical path edges pass +- canonical + colloquial + UI-selected-object variants pass for critical branches +- no silent heuristic masking diff --git a/.codex/skills/domain-case-loop/references/verdict_template.md b/.codex/skills/domain-case-loop/references/verdict_template.md index 6aeaa5a..0edd860 100644 --- a/.codex/skills/domain-case-loop/references/verdict_template.md +++ b/.codex/skills/domain-case-loop/references/verdict_template.md @@ -3,37 +3,53 @@ ## 1. Смысл вопроса ... -## 2. Что реально посчитано +## 2. Главный пользовательский путь и дерево сценария +- root: +- critical child nodes: +- critical edges: +- primary user path: + +## 3. Что реально посчитано ... -## 3. Где расхождение по бизнес-смыслу +## 4. Где расхождение по бизнес-смыслу ... -## 4. Где route / capability mismatch +## 5. Где route / capability mismatch ... -## 5. Evidence quality +## 6. Evidence quality - exact / partial / heuristic / technical insufficiency - why -## 6. P0 defects +## 7. P0 defects - ... -## 7. P1 defects +## 8. P1 defects - ... -## 8. P2 defects +## 9. P2 defects - ... -## 9. Minimal patch directions +## 10. Minimal patch directions - ... -## 10. Acceptance criteria for rerun +## 11. Acceptance matrix for rerun +- Node / edge coverage: +- Canonical wording: +- Colloquial wording: +- UI-generated selected-object wording: +- Carryover invariants: +- Expected answer shape: +- Defect class: + +## 12. Acceptance criteria for rerun - ... - Include colloquial/slang variants and UI-generated selected-object follow-up variants when they are part of the business flow. +- Require the primary user path to pass end-to-end, not only the root node. -## 11. Quality score +## 13. Quality score - integer from 0 to 100 -## 12. Loop decision +## 14. Loop decision - accepted / continue / partial / blocked / needs_exact_capability diff --git a/AGENTS.md b/AGENTS.md index b30f290..4498f27 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,10 @@ Rules: - Preserve current architecture: domain loop may automate capture, review, rerun, and artifact storage, but must not rewrite runtime foundations. - Prefer machine-readable case artifacts in `artifacts/domain_runs//`, especially `baseline_turn.json` / `rerun_turn.json`, over ad hoc prose-only summaries. - For cascading user questions in one domain, prefer scenario artifacts (`scenario_manifest.json`, `scenario_state.json`, per-step `turn.json`) over separate unlinked case folders. +- For follow-up-heavy domains, treat acceptance as scenario-tree coverage: root node, critical child nodes, critical edges, and the primary user path must be validated explicitly. +- Do not accept a domain when only the root snapshot works but selected-object or drilldown follow-up edges still fail. +- For critical branches, validate at least canonical wording, colloquial wording, and UI-generated selected-object wording when that UX exists. +- Treat temporal carryover, selected-object carryover, answer-shape match, and ordering semantics as first-class acceptance invariants rather than optional polish. - If a case falls outside the current routed contour because the route/intent/capability is not wired yet, treat it as domain enablement work for this project, not as automatic out-of-scope rejection. - For new unmarked domains, `needs_exact_capability` means "bootstrap or extend the contour" rather than "close the case as unsupported". - A case can be marked `accepted` only when analyst verdict is at least `80/100`, no unresolved `P0` remains, and the rerun does not mask heuristic output as confirmed. diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index c655ab5..51c0a47 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1342,8 +1342,8 @@ function hasInventorySaleTraceSignal(text) { } function hasInventoryProvenanceSignalV2(text) { const hasItemCue = /(?:товар|номенклатур|sku|item|product|остат(?:ок|ки)|склад)/iu.test(text); - const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|поставщик|supplier|vendor)/iu.test(text); - const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|purchase\s+provenance|purchase\s+date)/iu.test(text); + const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставщик|supplier|vendor)/iu.test(text); + const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставлен(?:ы|а)?|purchase\s+provenance|purchase\s+date)/iu.test(text); return hasItemCue && hasSupplierCue && hasPurchaseCue; } function hasInventoryPurchaseDateSignal(text) { diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index a8a5e97..973e9e6 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -1605,8 +1605,14 @@ function hasInventorySaleTraceSignal(text: string): boolean { function hasInventoryProvenanceSignalV2(text: string): boolean { const hasItemCue = /(?:товар|номенклатур|sku|item|product|остат(?:ок|ки)|склад)/iu.test(text); - const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|поставщик|supplier|vendor)/iu.test(text); - const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|purchase\s+provenance|purchase\s+date)/iu.test(text); + const hasSupplierCue = + /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставщик|supplier|vendor)/iu.test( + text + ); + const hasPurchaseCue = + /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставлен(?:ы|а)?|purchase\s+provenance|purchase\s+date)/iu.test( + text + ); return hasItemCue && hasSupplierCue && hasPurchaseCue; } diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index 4329163..f40d01f 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -173,6 +173,17 @@ describe("address query shape classifier", () => { expect(filters.item).toBe("Кромка с клеем 33 альмандин 137 м"); }); + it("keeps colloquial selected-object supplier follow-up in inventory provenance intent", () => { + const mode = detectAddressQuestionMode( + 'По выбранному объекту "Кромка с клеем 33 альмандин 137 м": кто поставил этот товар' + ); + const result = resolveAddressIntent( + 'По выбранному объекту "Кромка с клеем 33 альмандин 137 м": кто поставил этот товар' + ); + expect(mode.mode).toBe("address_query"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + it("keeps full supplier anchor with comma suffix for stock-overlap questions", () => { const filters = extractAddressFilters( "Какие товары от поставщика Гамма-мебель, ООО сейчас еще лежат на складе Основной склад?",