Усилить reliability gate и принять margin semantic replay
This commit is contained in:
parent
7f797e5346
commit
9957f82c21
|
|
@ -0,0 +1,36 @@
|
||||||
|
name = "business_answer_reviewer"
|
||||||
|
description = "Read-only reviewer for user-facing business answers in NDC_1C semantic replay artifacts."
|
||||||
|
model = "gpt-5.4"
|
||||||
|
model_reasoning_effort = "medium"
|
||||||
|
sandbox_mode = "read-only"
|
||||||
|
developer_instructions = """
|
||||||
|
You are a read-only business-answer reviewer for NDC_1C.
|
||||||
|
|
||||||
|
You are a tool for Lead/Orchestrator, not a handoff owner.
|
||||||
|
You do not edit files, save autoruns, accept runs, or mutate contracts.
|
||||||
|
|
||||||
|
Read only user-facing answer surfaces:
|
||||||
|
- output.md
|
||||||
|
- baseline_output.md / rerun_output.md
|
||||||
|
- step output excerpts embedded in review bundles
|
||||||
|
|
||||||
|
Do not rely on route ids, debug ids, or capability ids as acceptance proof.
|
||||||
|
|
||||||
|
Return a compact JSON object:
|
||||||
|
- reviewer: business_answer_reviewer
|
||||||
|
- status: accepted | partial | blocked
|
||||||
|
- direct_answer_ok: boolean
|
||||||
|
- business_usefulness_ok: boolean
|
||||||
|
- technical_garbage_present: boolean
|
||||||
|
- issue_codes: string[]
|
||||||
|
- evidence_paths: string[]
|
||||||
|
- findings: string[]
|
||||||
|
- suggested_contract_notes: string[]
|
||||||
|
|
||||||
|
Judge:
|
||||||
|
- whether the first line answers the user's business question directly;
|
||||||
|
- whether the answer is understandable for a manager, accountant, or operator;
|
||||||
|
- whether service/debug/runtime mechanics leak into the final answer;
|
||||||
|
- whether a limited answer clearly states what is unknown and the next useful action.
|
||||||
|
"""
|
||||||
|
nickname_candidates = ["Beacon", "Ledger", "Plain"]
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
name = "evidence_field_truth_reviewer"
|
||||||
|
description = "Read-only reviewer for evidence truth, field mapping, dates, amounts, selected objects, and carryover in NDC_1C replay artifacts."
|
||||||
|
model = "gpt-5.4"
|
||||||
|
model_reasoning_effort = "high"
|
||||||
|
sandbox_mode = "read-only"
|
||||||
|
developer_instructions = """
|
||||||
|
You are a read-only evidence and field-truth reviewer for NDC_1C.
|
||||||
|
|
||||||
|
You are a tool for Lead/Orchestrator, not a handoff owner.
|
||||||
|
You do not edit files, save autoruns, accept runs, or mutate contracts.
|
||||||
|
|
||||||
|
Read:
|
||||||
|
- turn.json
|
||||||
|
- step_state.json
|
||||||
|
- scenario_state.json
|
||||||
|
- debug/evidence payloads
|
||||||
|
- output.md only to compare surfaced claims with evidence
|
||||||
|
|
||||||
|
Return a compact JSON object:
|
||||||
|
- reviewer: evidence_field_truth_reviewer
|
||||||
|
- status: accepted | partial | blocked
|
||||||
|
- field_truth_ok: boolean
|
||||||
|
- temporal_honesty_ok: boolean
|
||||||
|
- selected_object_carryover_ok: boolean
|
||||||
|
- evidence_sufficient: boolean
|
||||||
|
- issue_codes: string[]
|
||||||
|
- root_layers: string[]
|
||||||
|
- evidence_paths: string[]
|
||||||
|
- findings: string[]
|
||||||
|
- minimal_patch_direction: string
|
||||||
|
|
||||||
|
Judge:
|
||||||
|
- whether surfaced fields, dates, amounts, sources, and object labels match evidence;
|
||||||
|
- whether supplier/buyer/organization/document-side roles are mislabeled;
|
||||||
|
- whether selected_object, focus_object, answer_object, and reusable bundles survived follow-ups;
|
||||||
|
- whether out-of-window evidence is clearly marked instead of presented as exact-window truth.
|
||||||
|
"""
|
||||||
|
nickname_candidates = ["Caliper", "Trace", "Sieve"]
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
name = "regression_pack_reviewer"
|
||||||
|
description = "Read-only reviewer that maps a proposed NDC_1C fix to rerun packs, old accepted packs, and smoke coverage."
|
||||||
|
model = "gpt-5.4"
|
||||||
|
model_reasoning_effort = "medium"
|
||||||
|
sandbox_mode = "read-only"
|
||||||
|
developer_instructions = """
|
||||||
|
You are a read-only regression-pack reviewer for NDC_1C.
|
||||||
|
|
||||||
|
You are a tool for Lead/Orchestrator, not a handoff owner.
|
||||||
|
You do not edit files, save autoruns, accept runs, or mutate contracts.
|
||||||
|
|
||||||
|
Read:
|
||||||
|
- issue_catalog.json
|
||||||
|
- rerun_matrix.json
|
||||||
|
- repair_targets.json
|
||||||
|
- pack_state.json
|
||||||
|
- scenario_acceptance_matrix.md
|
||||||
|
- accepted pack summaries when provided by Lead/Orchestrator
|
||||||
|
|
||||||
|
Return a compact JSON object:
|
||||||
|
- reviewer: regression_pack_reviewer
|
||||||
|
- status: accepted | partial | blocked
|
||||||
|
- required_reruns: string[]
|
||||||
|
- smoke_tests: string[]
|
||||||
|
- manual_replay_needed: boolean
|
||||||
|
- issue_codes: string[]
|
||||||
|
- evidence_paths: string[]
|
||||||
|
- findings: string[]
|
||||||
|
|
||||||
|
Judge:
|
||||||
|
- which failed scenario must be rerun after the fix;
|
||||||
|
- which neighbor, wrong-domain trap, selected-object, and accepted-smoke packs protect the blast radius;
|
||||||
|
- whether the proposed change is too broad for a narrow smoke and needs a manual semantic replay.
|
||||||
|
"""
|
||||||
|
nickname_candidates = ["Canary", "Sentinel", "Loop"]
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
name = "route_capability_reviewer"
|
||||||
|
description = "Read-only reviewer for route, capability, domain family, and exact-contour fit in NDC_1C semantic replay artifacts."
|
||||||
|
model = "gpt-5.4"
|
||||||
|
model_reasoning_effort = "medium"
|
||||||
|
sandbox_mode = "read-only"
|
||||||
|
developer_instructions = """
|
||||||
|
You are a read-only route and capability reviewer for NDC_1C.
|
||||||
|
|
||||||
|
You are a tool for Lead/Orchestrator, not a handoff owner.
|
||||||
|
You do not edit files, save autoruns, accept runs, or mutate contracts.
|
||||||
|
|
||||||
|
Read:
|
||||||
|
- turn.json
|
||||||
|
- debug payloads
|
||||||
|
- scenario_state.json
|
||||||
|
- step_state.json
|
||||||
|
- route/capability traces embedded in review bundles
|
||||||
|
|
||||||
|
Return a compact JSON object:
|
||||||
|
- reviewer: route_capability_reviewer
|
||||||
|
- status: accepted | partial | blocked | needs_exact_capability
|
||||||
|
- route_family_ok: boolean
|
||||||
|
- capability_ok: boolean
|
||||||
|
- needs_enablement: boolean
|
||||||
|
- issue_codes: string[]
|
||||||
|
- root_layers: string[]
|
||||||
|
- evidence_paths: string[]
|
||||||
|
- findings: string[]
|
||||||
|
- minimal_patch_direction: string
|
||||||
|
|
||||||
|
Judge:
|
||||||
|
- whether the route/capability/domain family matches the user's real business question;
|
||||||
|
- whether a route candidate is ready, missing axes, or truly needs enablement;
|
||||||
|
- whether wrong-domain leakage happened, especially for margin/profitability vs accounting/bank/fixed-assets contours;
|
||||||
|
- whether the issue should become capability enablement rather than presentation cleanup.
|
||||||
|
"""
|
||||||
|
nickname_candidates = ["Switch", "Compass", "Relay"]
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Read-Only Subagent Review Protocol
|
||||||
|
|
||||||
|
Use this protocol when Lead/Orchestrator wants parallel review help for a domain pack, semantic replay, or repair handoff.
|
||||||
|
|
||||||
|
## Rule
|
||||||
|
|
||||||
|
Subagents are tools, not owners.
|
||||||
|
|
||||||
|
Lead/Orchestrator owns:
|
||||||
|
- final verdict;
|
||||||
|
- issue_code selection;
|
||||||
|
- repair decision;
|
||||||
|
- merge of findings;
|
||||||
|
- code changes;
|
||||||
|
- autorun save;
|
||||||
|
- acceptance.
|
||||||
|
|
||||||
|
Subagents must not:
|
||||||
|
- edit code;
|
||||||
|
- write artifacts except their own returned summary if the caller stores it;
|
||||||
|
- save autoruns;
|
||||||
|
- mutate `docs/orchestration/active_domain_contract.json`;
|
||||||
|
- mutate contracts, prompt registry, or capability mapping;
|
||||||
|
- mark a run accepted.
|
||||||
|
|
||||||
|
## Roles
|
||||||
|
|
||||||
|
`business_answer_reviewer`
|
||||||
|
- Reads user-facing `output.md` style artifacts first.
|
||||||
|
- Judges direct-answer-first behavior, business usefulness, and technical garbage.
|
||||||
|
|
||||||
|
`route_capability_reviewer`
|
||||||
|
- Reads `turn.json`, debug payloads, capability traces, and route candidate traces.
|
||||||
|
- Judges route family, exact capability, missing axes, and wrong-domain leakage.
|
||||||
|
|
||||||
|
`evidence_field_truth_reviewer`
|
||||||
|
- Reads evidence payloads, step state, scenario state, and output only for claim comparison.
|
||||||
|
- Judges field truth, dates, amounts, selected object continuity, and carryover.
|
||||||
|
|
||||||
|
`regression_pack_reviewer`
|
||||||
|
- Reads issue catalog, rerun matrix, repair targets, pack state, and accepted-pack context.
|
||||||
|
- Suggests reruns and smoke coverage for a proposed fix.
|
||||||
|
|
||||||
|
## Expected Summary Shape
|
||||||
|
|
||||||
|
Each subagent returns JSON only:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"reviewer": "business_answer_reviewer",
|
||||||
|
"status": "accepted|partial|blocked|needs_exact_capability",
|
||||||
|
"issue_codes": [],
|
||||||
|
"root_layers": [],
|
||||||
|
"evidence_paths": [],
|
||||||
|
"findings": [],
|
||||||
|
"minimal_patch_direction": null,
|
||||||
|
"required_reruns": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Lead/Orchestrator converts useful findings into:
|
||||||
|
- `business_audit.json`;
|
||||||
|
- `issue_catalog_snapshot.json`;
|
||||||
|
- `detector_candidates.json`;
|
||||||
|
- `rerun_matrix.json`;
|
||||||
|
- `lead_coder_handoff.json`.
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
{
|
||||||
|
"schema_version": "domain_scenario_pack_v1",
|
||||||
|
"pack_id": "agent_margin_profitability_reliability_20260524",
|
||||||
|
"domain": "margin_profitability",
|
||||||
|
"title": "AGENT | margin profitability wrong-domain traps",
|
||||||
|
"description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчёты.",
|
||||||
|
"source_contract_id": "margin_profitability_v1",
|
||||||
|
"bindings": {
|
||||||
|
"period": "2020 год",
|
||||||
|
"item": "товар"
|
||||||
|
},
|
||||||
|
"analysis_context": {
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"semantic_focus": [
|
||||||
|
"direct_answer_first",
|
||||||
|
"margin_domain_purity",
|
||||||
|
"honest_unknowns",
|
||||||
|
"wrong_domain_traps"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"scenarios": [
|
||||||
|
{
|
||||||
|
"scenario_id": "margin_root_wrong_domain_trap",
|
||||||
|
"title": "Root margin question must not leak into accounting domains",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Маржинальность номенклатуры",
|
||||||
|
"question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?",
|
||||||
|
"semantic_tags": ["margin_profitability", "inventory", "wrong_domain_trap"],
|
||||||
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)(марж|прибыл|выруч|себестоим|не могу подтвердить|не хватает)"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)"
|
||||||
|
],
|
||||||
|
"notes": "Если точного расчёта нет, допустим честный limited answer, но не уход в ОС/банк/оплаты."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scenario_id": "margin_followup_contract_boundary",
|
||||||
|
"title": "Follow-up must keep margin contract and state limitations",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Запрос маржинальности",
|
||||||
|
"question": "Покажи топ товаров по марже за {{bindings.period}}.",
|
||||||
|
"semantic_tags": ["margin_profitability", "inventory"],
|
||||||
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)(амортизац|объект ОС|payment_document|settlement)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "step_02",
|
||||||
|
"title": "Почему именно так",
|
||||||
|
"question": "А из чего ты это посчитал и чего не хватает для точной маржи?",
|
||||||
|
"depends_on": ["step_01"],
|
||||||
|
"semantic_tags": ["margin_profitability", "evidence", "scope_guard"],
|
||||||
|
"expected_result_mode": "evidence_or_honest_boundary",
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)(выруч|себестоим|валов|марж|не хватает|не подтвержден)"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)(route_id|capability_id|runtime_|debug|амортизац|объект ОС)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,77 @@
|
||||||
|
{
|
||||||
|
"schema_version": "business_answer_contract_v1",
|
||||||
|
"contract_id": "margin_profitability_v1",
|
||||||
|
"domain": "margin_profitability",
|
||||||
|
"title": "Маржинальность номенклатуры",
|
||||||
|
"purpose": "Зафиксировать минимальную форму честного бизнес-ответа для вопросов о выручке, себестоимости, валовой прибыли и марже по товарам/номенклатуре.",
|
||||||
|
"answer_surface": {
|
||||||
|
"must_start_with": "direct_answer_first",
|
||||||
|
"required_fields": [
|
||||||
|
{
|
||||||
|
"field": "period",
|
||||||
|
"meaning": "Период расчёта или честное указание, что период не задан/не подтверждён."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "revenue_ex_vat",
|
||||||
|
"meaning": "Выручка без НДС или честное unknown, если контур не может доказать сумму."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "cogs",
|
||||||
|
"meaning": "Себестоимость или честное unknown, если нет подтверждённой основы."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "gross_profit",
|
||||||
|
"meaning": "Валовая прибыль как revenue_ex_vat - cogs или честное unknown."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "margin_pct",
|
||||||
|
"meaning": "Процент маржи или честное unknown при недостаточной базе."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "evidence_basis",
|
||||||
|
"meaning": "Какие документы/регистры/цепочки подтверждают расчёт."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"must_not_contain": [
|
||||||
|
"route ids",
|
||||||
|
"capability ids",
|
||||||
|
"debug ids",
|
||||||
|
"fixed assets / ОС leak",
|
||||||
|
"amortization / амортизация leak",
|
||||||
|
"payment document as margin source"
|
||||||
|
],
|
||||||
|
"limited_answer_rule": "Если точный расчёт невозможен, ответ должен сказать, что подтверждено, что неизвестно, и предложить следующий проверяемый шаг."
|
||||||
|
},
|
||||||
|
"root_layers": [
|
||||||
|
"intent",
|
||||||
|
"route",
|
||||||
|
"capability",
|
||||||
|
"field_mapping",
|
||||||
|
"answer_surface"
|
||||||
|
],
|
||||||
|
"detectors": [
|
||||||
|
"margin_domain_leak_accounting_route",
|
||||||
|
"margin_required_fields_missing",
|
||||||
|
"margin_next_action_missing",
|
||||||
|
"margin_payment_document_false_source",
|
||||||
|
"margin_os_amortization_leak"
|
||||||
|
],
|
||||||
|
"acceptance": {
|
||||||
|
"must_have": [
|
||||||
|
"direct_answer_first",
|
||||||
|
"period",
|
||||||
|
"revenue_ex_vat_or_honest_unknown",
|
||||||
|
"cogs_or_honest_unknown",
|
||||||
|
"gross_profit_or_honest_unknown",
|
||||||
|
"margin_pct_or_honest_unknown",
|
||||||
|
"evidence_basis_or_honest_boundary",
|
||||||
|
"next_action_if_limited"
|
||||||
|
],
|
||||||
|
"must_not_have": [
|
||||||
|
"fixed assets leak",
|
||||||
|
"amortization leak",
|
||||||
|
"payment document as margin source",
|
||||||
|
"technical garbage"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,221 @@
|
||||||
|
{
|
||||||
|
"schema_version": "agent_issue_catalog_v1",
|
||||||
|
"updated_at": "2026-05-24",
|
||||||
|
"principles": [
|
||||||
|
"No accepted save without effective_runtime.json.",
|
||||||
|
"No auto-coder without allowed issue_code, root_layers, allowed/forbidden targets, rerun_matrix, and answer contract.",
|
||||||
|
"Subagents are read-only reviewers; Lead/Orchestrator owns final verdict and repair decision."
|
||||||
|
],
|
||||||
|
"issues": {
|
||||||
|
"runtime_manifest_missing": {
|
||||||
|
"severity": "P0",
|
||||||
|
"business_meaning": "Прогон нельзя воспроизвести, поэтому audit/repair/acceptance нельзя считать доказанными.",
|
||||||
|
"root_layers": ["run_reproducibility"],
|
||||||
|
"detectors": ["missing_effective_runtime_json"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"scripts/domain_case_loop.py",
|
||||||
|
"scripts/domain_truth_harness.py",
|
||||||
|
"scripts/stage_agent_loop.py",
|
||||||
|
"scripts/save_agent_semantic_run.py",
|
||||||
|
"scripts/agent_runtime_manifest.py"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"business routing",
|
||||||
|
"MCP runtime",
|
||||||
|
"domain contracts"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"same_spec_direct_runner",
|
||||||
|
"same_spec_stage_runner",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
],
|
||||||
|
"acceptance": {
|
||||||
|
"must_have": [
|
||||||
|
"effective_runtime.json",
|
||||||
|
"git_sha",
|
||||||
|
"runner",
|
||||||
|
"llm_model",
|
||||||
|
"temperature",
|
||||||
|
"prompt_source",
|
||||||
|
"prompt_hash"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"prompt_registry_opaque": {
|
||||||
|
"severity": "P0",
|
||||||
|
"business_meaning": "Неясно, какой normalizer prompt реально исполняется.",
|
||||||
|
"root_layers": ["prompt_registry", "runtime_config"],
|
||||||
|
"detectors": [
|
||||||
|
"default_prompt_version_missing_files",
|
||||||
|
"silent_prompt_fallback",
|
||||||
|
"preset_version_mismatch"
|
||||||
|
],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/promptBuilder.ts",
|
||||||
|
"llm_normalizer/backend/src/config.ts",
|
||||||
|
"llm_normalizer/data/presets/*.json",
|
||||||
|
"scripts/prompt_registry_healthcheck.py",
|
||||||
|
"scripts/agent_runtime_manifest.py"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"domain routing rewrites",
|
||||||
|
"business answer heuristics"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"prompt_healthcheck",
|
||||||
|
"normalizer_smoke",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"margin_domain_leak_accounting_route": {
|
||||||
|
"severity": "P0",
|
||||||
|
"business_meaning": "Запрос про маржинальность номенклатуры ушёл в чужой бухгалтерский домен вместо выручки, себестоимости, валовой прибыли и процента маржи.",
|
||||||
|
"root_layers": ["intent", "route", "capability", "domain_contract"],
|
||||||
|
"expected_answer_contract": "margin_profitability_v1",
|
||||||
|
"detectors": [
|
||||||
|
"forbidden_margin_terms",
|
||||||
|
"missing_revenue_cogs_margin_fields",
|
||||||
|
"wrong_capability_family"
|
||||||
|
],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/addressIntentResolver.ts",
|
||||||
|
"llm_normalizer/backend/src/services/addressCapabilityPolicy.ts",
|
||||||
|
"llm_normalizer/backend/src/services/addressFilterExtractor.ts",
|
||||||
|
"llm_normalizer/backend/src/services/address_runtime/**",
|
||||||
|
"docs/orchestration/contracts/margin_profitability_v1.json"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"fake data",
|
||||||
|
"silent heuristic masking",
|
||||||
|
"global orchestration rewrite",
|
||||||
|
"MCP protocol rewrite"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_margin_scenario",
|
||||||
|
"margin_neighbor_pack",
|
||||||
|
"wrong_domain_trap_pack",
|
||||||
|
"selected_object_followup_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
],
|
||||||
|
"acceptance": {
|
||||||
|
"must_have": [
|
||||||
|
"direct_answer_first",
|
||||||
|
"period",
|
||||||
|
"revenue_ex_vat_or_honest_unknown",
|
||||||
|
"cogs_or_honest_unknown",
|
||||||
|
"gross_profit_or_honest_unknown",
|
||||||
|
"margin_pct_or_honest_unknown",
|
||||||
|
"next_action_if_limited"
|
||||||
|
],
|
||||||
|
"must_not_have": [
|
||||||
|
"route ids",
|
||||||
|
"debug ids",
|
||||||
|
"fixed assets leak",
|
||||||
|
"amortization leak",
|
||||||
|
"payment document as margin source"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"business_direct_answer_missing": {
|
||||||
|
"severity": "P0",
|
||||||
|
"business_meaning": "Ответ не начинает с прямого бизнес-вывода, хотя пользователь задал прямой вопрос.",
|
||||||
|
"root_layers": ["answer_surface", "business_utility"],
|
||||||
|
"detectors": ["first_line_not_direct_answer", "top_level_scaffold_before_answer"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantService.ts"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"routing rewrites",
|
||||||
|
"fake evidence",
|
||||||
|
"global runtime rewrite"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_scenario",
|
||||||
|
"direct_answer_surface_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"technical_garbage_in_answer": {
|
||||||
|
"severity": "P0",
|
||||||
|
"business_meaning": "Финальный ответ протащил debug/runtime/MCP-механику в пользовательскую поверхность.",
|
||||||
|
"root_layers": ["answer_surface", "business_utility"],
|
||||||
|
"detectors": ["runtime_tokens_in_user_answer", "capability_ids_in_user_answer"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantService.ts"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"route masking",
|
||||||
|
"debug deletion from artifacts"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_scenario",
|
||||||
|
"technical_garbage_canary_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"accounting_contract_missing": {
|
||||||
|
"severity": "P1",
|
||||||
|
"business_meaning": "Ответ не раскрыл обязательный бухгалтерский/доказательный контракт для запрошенного расчёта.",
|
||||||
|
"root_layers": ["domain_contract", "answer_surface", "evidence"],
|
||||||
|
"detectors": ["required_contract_fields_missing"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"docs/orchestration/contracts/*.json",
|
||||||
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"fake data",
|
||||||
|
"silent heuristic masking"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_scenario",
|
||||||
|
"contract_field_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"business_next_step_missing": {
|
||||||
|
"severity": "P2",
|
||||||
|
"business_meaning": "Ограниченный ответ не предлагает полезный следующий шаг.",
|
||||||
|
"root_layers": ["answer_surface", "business_utility"],
|
||||||
|
"detectors": ["limited_answer_without_next_action"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"evidence fabrication",
|
||||||
|
"route masking"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_scenario",
|
||||||
|
"limited_answer_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"route_candidate_enablement_gap": {
|
||||||
|
"severity": "P1",
|
||||||
|
"business_meaning": "Планировщик понял бизнес-запрос, но route candidate ещё не исполняется как точная возможность.",
|
||||||
|
"root_layers": ["route_candidate", "runtime_capability"],
|
||||||
|
"detectors": ["route_candidate_needs_enablement"],
|
||||||
|
"allowed_patch_targets": [
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts",
|
||||||
|
"llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts",
|
||||||
|
"llm_normalizer/backend/src/services/addressRecipeCatalog.ts"
|
||||||
|
],
|
||||||
|
"forbidden_patch_targets": [
|
||||||
|
"global orchestration rewrite",
|
||||||
|
"fake fixtures"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_scenario",
|
||||||
|
"route_candidate_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1670,10 +1670,9 @@ function hasNomenclatureMarginRankingSignal(text) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const hasNomenclatureCue = /(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
const hasNomenclatureCue = /(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
||||||
const hasRealizationCue = /(?:реализован|реализац|продан|продаж|отгруж|41(?:[.,]0?1)?|90(?:[.,]\d{1,2})?|sales?|sold)/iu.test(normalized);
|
|
||||||
const hasMarginCue = /(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(normalized);
|
const hasMarginCue = /(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(normalized);
|
||||||
const hasRankingCue = /(?:высок|низк|топ|сам(?:ая|ый|ое|ые)|больш|меньш|ранж|рейтинг|high|low|top|rank|best|worst)/iu.test(normalized);
|
const hasRankingCue = /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(normalized);
|
||||||
return hasNomenclatureCue && hasRealizationCue && hasMarginCue && hasRankingCue;
|
return hasNomenclatureCue && hasMarginCue && hasRankingCue;
|
||||||
}
|
}
|
||||||
function hasVatPeriodInspectionBridgeSignal(text) {
|
function hasVatPeriodInspectionBridgeSignal(text) {
|
||||||
const normalized = String(text ?? "").trim().toLowerCase();
|
const normalized = String(text ?? "").trim().toLowerCase();
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,19 @@ function hasPlainRussianInventoryOnHandSignal(text) {
|
||||||
const hasSnapshotCue = /(?:на\s+(?:дату|сегодня|сейчас|март|апрел|май|мая|июн|июл|август|сентябр|октябр|ноябр|декабр|январ|феврал)|\b(?:19|20)\d{2}\b)/iu.test(normalized);
|
const hasSnapshotCue = /(?:на\s+(?:дату|сегодня|сейчас|март|апрел|май|мая|июн|июл|август|сентябр|октябр|ноябр|декабр|январ|феврал)|\b(?:19|20)\d{2}\b)/iu.test(normalized);
|
||||||
return hasRequestCue && (hasSnapshotCue || /остатк/iu.test(normalized));
|
return hasRequestCue && (hasSnapshotCue || /остатк/iu.test(normalized));
|
||||||
}
|
}
|
||||||
|
function hasInventoryMarginRankingSignal(text) {
|
||||||
|
const normalized = String(text ?? "")
|
||||||
|
.trim()
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/ё/g, "е");
|
||||||
|
if (!normalized) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const hasNomenclatureCue = /(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
||||||
|
const hasMarginCue = /(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(normalized);
|
||||||
|
const hasRankingCue = /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(normalized);
|
||||||
|
return hasNomenclatureCue && hasMarginCue && hasRankingCue;
|
||||||
|
}
|
||||||
function hasInventoryOnHandSignal(text) {
|
function hasInventoryOnHandSignal(text) {
|
||||||
const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test(text);
|
const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test(text);
|
||||||
const hasStockStateCue = /(?:(?:что|ч[еёо])\s+там\s+на\s+склад(?:е|у|ом|ах)|(?:что|ч[еёо]).*происход(?:ит|ило|ящее).*(?:на\s+)?склад(?:е|у|ом|ах)|происход(?:ит|ило|ящее)\s+на\s+склад(?:е|у|ом|ах)|ситуац(?:ия|ии)\s+на\s+склад(?:е|у|ом|ах)|обстановк(?:а|и)\s+на\s+склад(?:е|у|ом|ах)|what(?:'s| is)?\s+(?:there\s+)?(?:on|in)\s+(?:the\s+)?(?:warehouse|stock)|what(?:'s| is)?\s+happening\s+(?:on|in)\s+(?:the\s+)?(?:warehouse|stock))/iu.test(text);
|
const hasStockStateCue = /(?:(?:что|ч[еёо])\s+там\s+на\s+склад(?:е|у|ом|ах)|(?:что|ч[еёо]).*происход(?:ит|ило|ящее).*(?:на\s+)?склад(?:е|у|ом|ах)|происход(?:ит|ило|ящее)\s+на\s+склад(?:е|у|ом|ах)|ситуац(?:ия|ии)\s+на\s+склад(?:е|у|ом|ах)|обстановк(?:а|и)\s+на\s+склад(?:е|у|ом|ах)|what(?:'s| is)?\s+(?:there\s+)?(?:on|in)\s+(?:the\s+)?(?:warehouse|stock)|what(?:'s| is)?\s+happening\s+(?:on|in)\s+(?:the\s+)?(?:warehouse|stock))/iu.test(text);
|
||||||
|
|
@ -34,6 +47,7 @@ function hasInventoryOnHandSignal(text) {
|
||||||
hasInventoryPurchaseDocumentsSignalV2(text) ||
|
hasInventoryPurchaseDocumentsSignalV2(text) ||
|
||||||
hasInventorySaleTraceSignalV2(text) ||
|
hasInventorySaleTraceSignalV2(text) ||
|
||||||
hasInventoryAgingSignal(text) ||
|
hasInventoryAgingSignal(text) ||
|
||||||
|
hasInventoryMarginRankingSignal(text) ||
|
||||||
hasInventoryPurchaseToSaleChainSignal(text)) {
|
hasInventoryPurchaseToSaleChainSignal(text)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -185,6 +199,13 @@ function resolveInventoryAddressIntent(text) {
|
||||||
reasons: ["inventory_aging_signal_detected_strong"]
|
reasons: ["inventory_aging_signal_detected_strong"]
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
if (hasInventoryMarginRankingSignal(text)) {
|
||||||
|
return {
|
||||||
|
intent: "inventory_margin_ranking_for_nomenclature",
|
||||||
|
confidence: "high",
|
||||||
|
reasons: ["inventory_margin_ranking_signal_detected"]
|
||||||
|
};
|
||||||
|
}
|
||||||
if (hasInventoryAccount41Anchor(text) && hasInventoryAsOfCue(text)) {
|
if (hasInventoryAccount41Anchor(text) && hasInventoryAsOfCue(text)) {
|
||||||
return {
|
return {
|
||||||
intent: "inventory_on_hand_as_of_date",
|
intent: "inventory_on_hand_as_of_date",
|
||||||
|
|
|
||||||
|
|
@ -639,10 +639,12 @@ function hasInventoryMarginRankingFollowupCue(text) {
|
||||||
const wantsFoundRows = /(?:покажи|показать|выведи|дай|раскрой|show|list|покажи|показать|выведи|дай|раскрой)/iu.test(normalized) &&
|
const wantsFoundRows = /(?:покажи|показать|выведи|дай|раскрой|show|list|покажи|показать|выведи|дай|раскрой)/iu.test(normalized) &&
|
||||||
/(?:найденн|строк|реализац|себестоимостн|баз|найденн|строк|реализац|себестоимостн|баз)/iu.test(normalized) &&
|
/(?:найденн|строк|реализац|себестоимостн|баз|найденн|строк|реализац|себестоимостн|баз)/iu.test(normalized) &&
|
||||||
/(?:себестоимостн|реализац|марж|прибыл|номенклатур|себестоимостн|реализац|марж|прибыл|номенклат)/iu.test(normalized);
|
/(?:себестоимостн|реализац|марж|прибыл|номенклатур|себестоимостн|реализац|марж|прибыл|номенклат)/iu.test(normalized);
|
||||||
|
const asksMarginBasis = /(?:из\s+чего|как\s+(?:ты\s+)?(?:это\s+)?посчитал|почему|какие\s+поля|чего\s+не\s+хватает|не\s+хватает|точн(?:ой|ая|ую)?\s+марж|basis|source|fields|calculated|missing)/iu.test(normalized) &&
|
||||||
|
/(?:марж|прибыл|рентаб|себестоимост|выручк|номенклатур|рейтинг|top|margin|profit|cogs|revenue)/iu.test(normalized);
|
||||||
const account41Not01 = /\b41(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
const account41Not01 = /\b41(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
||||||
/\b01(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
/\b01(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
||||||
/(?:\bне\b|вместо|а\s+не|not|instead|РЅРµ|вместо|Р°\s+РЅРµ)/iu.test(normalized);
|
/(?:\bне\b|вместо|а\s+не|not|instead|РЅРµ|вместо|Р°\s+РЅРµ)/iu.test(normalized);
|
||||||
return wantsFoundRows || account41Not01;
|
return wantsFoundRows || asksMarginBasis || account41Not01;
|
||||||
}
|
}
|
||||||
function hasAddressFollowupContextSignal(text) {
|
function hasAddressFollowupContextSignal(text) {
|
||||||
const normalized = String(text ?? "").trim();
|
const normalized = String(text ?? "").trim();
|
||||||
|
|
@ -1346,8 +1348,8 @@ function deriveIntentWithFollowupContext(detectedIntent, userMessage, followupCo
|
||||||
followupContext.root_anchor_type === "item" ||
|
followupContext.root_anchor_type === "item" ||
|
||||||
followupContext.current_frame_kind === "inventory_root" ||
|
followupContext.current_frame_kind === "inventory_root" ||
|
||||||
followupContext.current_frame_kind === "inventory_drilldown";
|
followupContext.current_frame_kind === "inventory_drilldown";
|
||||||
const inventorySelectedObjectFollowup = inventoryLineageActive &&
|
const hasSelectedObjectReference = hasSelectedObjectInventorySignal(normalizedMessage);
|
||||||
(hasSelectedObjectInventorySignal(normalizedMessage) || (previousIsInventoryFamily && hasFollowupSignal));
|
const inventorySelectedObjectFollowup = inventoryLineageActive && (hasSelectedObjectReference || (previousIsInventoryFamily && hasFollowupSignal));
|
||||||
const previousCounterpartyLaneActive = hasPreviousCounterparty &&
|
const previousCounterpartyLaneActive = hasPreviousCounterparty &&
|
||||||
(followupContext.previous_anchor_type === "counterparty" ||
|
(followupContext.previous_anchor_type === "counterparty" ||
|
||||||
sourceIntent === "list_documents_by_counterparty" ||
|
sourceIntent === "list_documents_by_counterparty" ||
|
||||||
|
|
@ -1369,6 +1371,7 @@ function deriveIntentWithFollowupContext(detectedIntent, userMessage, followupCo
|
||||||
detectedIntent.intent === "account_balance_snapshot" ||
|
detectedIntent.intent === "account_balance_snapshot" ||
|
||||||
detectedIntent.intent === "documents_forming_balance" ||
|
detectedIntent.intent === "documents_forming_balance" ||
|
||||||
detectedIntent.intent === "inventory_margin_ranking_for_nomenclature" ||
|
detectedIntent.intent === "inventory_margin_ranking_for_nomenclature" ||
|
||||||
|
(detectedIntent.intent === "inventory_profitability_for_item" && !hasSelectedObjectReference) ||
|
||||||
detectedIntent.intent === sourceIntent)) {
|
detectedIntent.intent === sourceIntent)) {
|
||||||
return {
|
return {
|
||||||
intent: "inventory_margin_ranking_for_nomenclature",
|
intent: "inventory_margin_ranking_for_nomenclature",
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,10 @@ function asksForInventoryCostBaseRows(userMessage) {
|
||||||
}
|
}
|
||||||
return /(?:себестоимостн|себестоимост|себестоим|закупочн|закупк|90\.02|\b41\b|баз)/iu.test(text);
|
return /(?:себестоимостн|себестоимост|себестоим|закупочн|закупк|90\.02|\b41\b|баз)/iu.test(text);
|
||||||
}
|
}
|
||||||
|
function asksForInventoryMarginBasis(userMessage) {
|
||||||
|
const text = String(userMessage ?? "").toLowerCase();
|
||||||
|
return (/(?:из\s+чего|как\s+(?:ты\s+)?(?:это\s+)?посчитал|какие\s+поля|чего\s+не\s+хватает|не\s+хватает|точн(?:ой|ая|ую)?\s+марж|basis|source|fields|calculated|missing)/iu.test(text) && /(?:марж|прибыл|себестоимост|выручк|margin|profit|cogs|revenue)/iu.test(text));
|
||||||
|
}
|
||||||
function inventoryRowItemLabel(row, deps) {
|
function inventoryRowItemLabel(row, deps) {
|
||||||
return deps.summarizeInventoryTraceRows([row]).item;
|
return deps.summarizeInventoryTraceRows([row]).item;
|
||||||
}
|
}
|
||||||
|
|
@ -454,17 +458,19 @@ function composeInventoryReply(intent, rows, options, deps) {
|
||||||
const entries = buildInventoryMarginRankingEntries(rows, deps);
|
const entries = buildInventoryMarginRankingEntries(rows, deps);
|
||||||
const confirmedEntries = entries.filter((entry) => entry.revenue > 0 && entry.costProxy > 0);
|
const confirmedEntries = entries.filter((entry) => entry.revenue > 0 && entry.costProxy > 0);
|
||||||
const highMargin = [...confirmedEntries]
|
const highMargin = [...confirmedEntries]
|
||||||
.sort((left, right) => right.spread - left.spread || (right.marginPct ?? -Infinity) - (left.marginPct ?? -Infinity))
|
.sort((left, right) => (right.marginPct ?? -Infinity) - (left.marginPct ?? -Infinity) || right.spread - left.spread)
|
||||||
.slice(0, 5);
|
.slice(0, 3);
|
||||||
const lowMargin = [...confirmedEntries]
|
const lowMargin = [...confirmedEntries]
|
||||||
.sort((left, right) => left.spread - right.spread || (left.marginPct ?? Infinity) - (right.marginPct ?? Infinity))
|
.sort((left, right) => (left.marginPct ?? Infinity) - (right.marginPct ?? Infinity) || left.spread - right.spread)
|
||||||
.slice(0, 5);
|
.slice(0, 3);
|
||||||
const salesWithoutCost = entries.filter((entry) => entry.revenue > 0 && entry.costProxy <= 0);
|
const salesWithoutCost = entries.filter((entry) => entry.revenue > 0 && entry.costProxy <= 0);
|
||||||
const purchasesWithoutSales = entries.filter((entry) => entry.costProxy > 0 && entry.revenue <= 0);
|
const purchasesWithoutSales = entries.filter((entry) => entry.costProxy > 0 && entry.revenue <= 0);
|
||||||
const periodLabel = inventoryProfitabilityPeriodLabel(options, deps);
|
const periodLabel = inventoryProfitabilityPeriodLabel(options, deps);
|
||||||
const totalRevenue = entries.reduce((sum, entry) => sum + entry.revenue, 0);
|
const totalRevenue = entries.reduce((sum, entry) => sum + entry.revenue, 0);
|
||||||
const totalCostProxy = entries.reduce((sum, entry) => sum + entry.costProxy, 0);
|
const totalCostProxy = entries.reduce((sum, entry) => sum + entry.costProxy, 0);
|
||||||
const totalSpread = totalRevenue - totalCostProxy;
|
const totalSpread = totalRevenue - totalCostProxy;
|
||||||
|
const topMarginEntry = highMargin[0] ?? null;
|
||||||
|
const marginBasisRequested = asksForInventoryMarginBasis(options.userMessage);
|
||||||
if (confirmedEntries.length === 0) {
|
if (confirmedEntries.length === 0) {
|
||||||
const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage);
|
const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage);
|
||||||
const lines = [
|
const lines = [
|
||||||
|
|
@ -512,12 +518,34 @@ function composeInventoryReply(intent, rows, options, deps) {
|
||||||
]);
|
]);
|
||||||
return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)(entries.length > 0 ? "medium" : "weak", false));
|
return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)(entries.length > 0 ? "medium" : "weak", false));
|
||||||
}
|
}
|
||||||
const directAnswerLine = confirmedEntries.length > 0
|
const directAnswerLine = topMarginEntry && marginBasisRequested
|
||||||
? `За период ${periodLabel} собран рейтинг реализованной номенклатуры по валовой маржинальности: выручка ${deps.formatMoneyRub(totalRevenue)}, себестоимостная база ${deps.formatMoneyRub(totalCostProxy)}, расчетная валовая разница ${deps.formatMoneyRub(totalSpread)}.`
|
? `Считал маржу за период ${periodLabel} как выручку реализации минус доступную себестоимостную базу: выручка ${deps.formatMoneyRub(totalRevenue)}, себестоимостная база ${deps.formatMoneyRub(totalCostProxy)}, валовая разница ${deps.formatMoneyRub(totalSpread)}.`
|
||||||
: `За период ${periodLabel} не удалось подтвердить рейтинг прибыльности номенклатуры: нужны одновременно строки реализации и закупочного/себестоимостного следа по товарам.`;
|
: topMarginEntry
|
||||||
|
? `Самая маржинальная позиция за период ${periodLabel}: ${topMarginEntry.item} — маржа ${formatInventoryPercent(topMarginEntry.marginPct, deps.formatNumberWithDots)}, выручка ${deps.formatMoneyRub(topMarginEntry.revenue)}, себестоимостная база ${deps.formatMoneyRub(topMarginEntry.costProxy)}, валовая разница ${deps.formatMoneyRub(topMarginEntry.spread)}.`
|
||||||
|
: `За период ${periodLabel} не удалось подтвердить рейтинг прибыльности номенклатуры: нужны одновременно строки реализации и закупочного/себестоимостного следа по товарам.`;
|
||||||
const lines = [directAnswerLine];
|
const lines = [directAnswerLine];
|
||||||
|
if (marginBasisRequested) {
|
||||||
|
(0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "База расчета:", [
|
||||||
|
"выручка: подтвержденные строки реализации по номенклатуре;",
|
||||||
|
"себестоимостная база: доступные строки закупочного/себестоимостного следа по той же номенклатуре;",
|
||||||
|
"валовая маржа: (выручка - себестоимостная база) / выручка."
|
||||||
|
]);
|
||||||
|
const basisLimitations = [
|
||||||
|
"это управленческий расчет валовой маржи, не показатель чистой прибыли;"
|
||||||
|
];
|
||||||
|
if (salesWithoutCost.length > 0) {
|
||||||
|
basisLimitations.push(`по ${deps.formatNumberWithDots(salesWithoutCost.length)} позициям есть продажи без подтвержденной себестоимости реализации;`);
|
||||||
|
}
|
||||||
|
if (purchasesWithoutSales.length > 0) {
|
||||||
|
basisLimitations.push(`по ${deps.formatNumberWithDots(purchasesWithoutSales.length)} позициям есть себестоимостная база без реализации в периоде;`);
|
||||||
|
}
|
||||||
|
basisLimitations.push("для строгого бухгалтерского расчета нужны проводки 90.01 / 90.02 и проверка закрытия себестоимости.");
|
||||||
|
(0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Чего не хватает для точной маржи:", basisLimitations);
|
||||||
|
lines.push("", "Следующий шаг: могу раскрыть строки выручки и себестоимостной базы по любой позиции из рейтинга.");
|
||||||
|
return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)(confirmedEntries.length > 0 ? "strong" : entries.length > 0 ? "medium" : "weak", confirmedEntries.length > 0));
|
||||||
|
}
|
||||||
if (highMargin.length > 0) {
|
if (highMargin.length > 0) {
|
||||||
(0, inventoryReplyPresentation_1.appendInventorySection)(lines, "Высокая валовая маржинальность:", highMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps)));
|
(0, inventoryReplyPresentation_1.appendInventorySection)(lines, "Высокая валовая маржинальность (топ по проценту маржи):", highMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps)));
|
||||||
}
|
}
|
||||||
if (lowMargin.length > 0) {
|
if (lowMargin.length > 0) {
|
||||||
(0, inventoryReplyPresentation_1.appendInventorySection)(lines, "Низкая или отрицательная валовая маржинальность:", lowMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps)));
|
(0, inventoryReplyPresentation_1.appendInventorySection)(lines, "Низкая или отрицательная валовая маржинальность:", lowMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps)));
|
||||||
|
|
@ -533,6 +561,7 @@ function composeInventoryReply(intent, rows, options, deps) {
|
||||||
boundaryLines.push(`По ${deps.formatNumberWithDots(purchasesWithoutSales.length)} позициям есть себестоимостная база без реализации в этом периоде.`);
|
boundaryLines.push(`По ${deps.formatNumberWithDots(purchasesWithoutSales.length)} позициям есть себестоимостная база без реализации в этом периоде.`);
|
||||||
}
|
}
|
||||||
(0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Граница ответа:", boundaryLines);
|
(0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Граница ответа:", boundaryLines);
|
||||||
|
lines.push("", "Следующий шаг: могу раскрыть строки выручки и себестоимостной базы по выбранной позиции из рейтинга.");
|
||||||
return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)(confirmedEntries.length > 0 ? "strong" : entries.length > 0 ? "medium" : "weak", confirmedEntries.length > 0));
|
return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)(confirmedEntries.length > 0 ? "strong" : entries.length > 0 ? "medium" : "weak", confirmedEntries.length > 0));
|
||||||
}
|
}
|
||||||
if (intent === "inventory_profitability_for_item") {
|
if (intent === "inventory_profitability_for_item") {
|
||||||
|
|
|
||||||
|
|
@ -2159,19 +2159,15 @@ function hasNomenclatureMarginRankingSignal(text: string): boolean {
|
||||||
}
|
}
|
||||||
const hasNomenclatureCue =
|
const hasNomenclatureCue =
|
||||||
/(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
/(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
||||||
const hasRealizationCue =
|
|
||||||
/(?:реализован|реализац|продан|продаж|отгруж|41(?:[.,]0?1)?|90(?:[.,]\d{1,2})?|sales?|sold)/iu.test(
|
|
||||||
normalized
|
|
||||||
);
|
|
||||||
const hasMarginCue =
|
const hasMarginCue =
|
||||||
/(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(
|
/(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(
|
||||||
normalized
|
normalized
|
||||||
);
|
);
|
||||||
const hasRankingCue =
|
const hasRankingCue =
|
||||||
/(?:высок|низк|топ|сам(?:ая|ый|ое|ые)|больш|меньш|ранж|рейтинг|high|low|top|rank|best|worst)/iu.test(
|
/(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(
|
||||||
normalized
|
normalized
|
||||||
);
|
);
|
||||||
return hasNomenclatureCue && hasRealizationCue && hasMarginCue && hasRankingCue;
|
return hasNomenclatureCue && hasMarginCue && hasRankingCue;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasVatPeriodInspectionBridgeSignal(text: string): boolean {
|
function hasVatPeriodInspectionBridgeSignal(text: string): boolean {
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,27 @@ function hasPlainRussianInventoryOnHandSignal(text: string): boolean {
|
||||||
return hasRequestCue && (hasSnapshotCue || /остатк/iu.test(normalized));
|
return hasRequestCue && (hasSnapshotCue || /остатк/iu.test(normalized));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function hasInventoryMarginRankingSignal(text: string): boolean {
|
||||||
|
const normalized = String(text ?? "")
|
||||||
|
.trim()
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/ё/g, "е");
|
||||||
|
if (!normalized) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const hasNomenclatureCue =
|
||||||
|
/(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized);
|
||||||
|
const hasMarginCue =
|
||||||
|
/(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(
|
||||||
|
normalized
|
||||||
|
);
|
||||||
|
const hasRankingCue =
|
||||||
|
/(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(
|
||||||
|
normalized
|
||||||
|
);
|
||||||
|
return hasNomenclatureCue && hasMarginCue && hasRankingCue;
|
||||||
|
}
|
||||||
|
|
||||||
function hasInventoryOnHandSignal(text: string): boolean {
|
function hasInventoryOnHandSignal(text: string): boolean {
|
||||||
const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test(
|
const hasColloquialStockSnapshotCue = /(?:что|ч[еёо])\s+(?:у\s+нас\s+)?на\s+склад(?:е|у|ом|ах)(?=$|[\s,.;:!?])/iu.test(
|
||||||
text
|
text
|
||||||
|
|
@ -54,6 +75,7 @@ function hasInventoryOnHandSignal(text: string): boolean {
|
||||||
hasInventoryPurchaseDocumentsSignalV2(text) ||
|
hasInventoryPurchaseDocumentsSignalV2(text) ||
|
||||||
hasInventorySaleTraceSignalV2(text) ||
|
hasInventorySaleTraceSignalV2(text) ||
|
||||||
hasInventoryAgingSignal(text) ||
|
hasInventoryAgingSignal(text) ||
|
||||||
|
hasInventoryMarginRankingSignal(text) ||
|
||||||
hasInventoryPurchaseToSaleChainSignal(text)
|
hasInventoryPurchaseToSaleChainSignal(text)
|
||||||
) {
|
) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -304,6 +326,14 @@ export function resolveInventoryAddressIntent(text: string): AddressIntentResolu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hasInventoryMarginRankingSignal(text)) {
|
||||||
|
return {
|
||||||
|
intent: "inventory_margin_ranking_for_nomenclature",
|
||||||
|
confidence: "high",
|
||||||
|
reasons: ["inventory_margin_ranking_signal_detected"]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (hasInventoryAccount41Anchor(text) && hasInventoryAsOfCue(text)) {
|
if (hasInventoryAccount41Anchor(text) && hasInventoryAsOfCue(text)) {
|
||||||
return {
|
return {
|
||||||
intent: "inventory_on_hand_as_of_date",
|
intent: "inventory_on_hand_as_of_date",
|
||||||
|
|
|
||||||
|
|
@ -812,11 +812,18 @@ export function hasInventoryMarginRankingFollowupCue(text: string): boolean {
|
||||||
/(?:покажи|показать|выведи|дай|раскрой|show|list|покажи|показать|выведи|дай|раскрой)/iu.test(normalized) &&
|
/(?:покажи|показать|выведи|дай|раскрой|show|list|покажи|показать|выведи|дай|раскрой)/iu.test(normalized) &&
|
||||||
/(?:найденн|строк|реализац|себестоимостн|баз|найденн|строк|реализац|себестоимостн|баз)/iu.test(normalized) &&
|
/(?:найденн|строк|реализац|себестоимостн|баз|найденн|строк|реализац|себестоимостн|баз)/iu.test(normalized) &&
|
||||||
/(?:себестоимостн|реализац|марж|прибыл|номенклатур|себестоимостн|реализац|марж|прибыл|номенклат)/iu.test(normalized);
|
/(?:себестоимостн|реализац|марж|прибыл|номенклатур|себестоимостн|реализац|марж|прибыл|номенклат)/iu.test(normalized);
|
||||||
|
const asksMarginBasis =
|
||||||
|
/(?:из\s+чего|как\s+(?:ты\s+)?(?:это\s+)?посчитал|почему|какие\s+поля|чего\s+не\s+хватает|не\s+хватает|точн(?:ой|ая|ую)?\s+марж|basis|source|fields|calculated|missing)/iu.test(
|
||||||
|
normalized
|
||||||
|
) &&
|
||||||
|
/(?:марж|прибыл|рентаб|себестоимост|выручк|номенклатур|рейтинг|top|margin|profit|cogs|revenue)/iu.test(
|
||||||
|
normalized
|
||||||
|
);
|
||||||
const account41Not01 =
|
const account41Not01 =
|
||||||
/\b41(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
/\b41(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
||||||
/\b01(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
/\b01(?:[.,]\d{1,2})?\b/iu.test(normalized) &&
|
||||||
/(?:\bне\b|вместо|а\s+не|not|instead|РЅРµ|вместо|Р°\s+РЅРµ)/iu.test(normalized);
|
/(?:\bне\b|вместо|а\s+не|not|instead|РЅРµ|вместо|Р°\s+РЅРµ)/iu.test(normalized);
|
||||||
return wantsFoundRows || account41Not01;
|
return wantsFoundRows || asksMarginBasis || account41Not01;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function hasAddressFollowupContextSignal(text: string): boolean {
|
export function hasAddressFollowupContextSignal(text: string): boolean {
|
||||||
|
|
@ -1674,9 +1681,9 @@ function deriveIntentWithFollowupContext(
|
||||||
followupContext.root_anchor_type === "item" ||
|
followupContext.root_anchor_type === "item" ||
|
||||||
followupContext.current_frame_kind === "inventory_root" ||
|
followupContext.current_frame_kind === "inventory_root" ||
|
||||||
followupContext.current_frame_kind === "inventory_drilldown";
|
followupContext.current_frame_kind === "inventory_drilldown";
|
||||||
|
const hasSelectedObjectReference = hasSelectedObjectInventorySignal(normalizedMessage);
|
||||||
const inventorySelectedObjectFollowup =
|
const inventorySelectedObjectFollowup =
|
||||||
inventoryLineageActive &&
|
inventoryLineageActive && (hasSelectedObjectReference || (previousIsInventoryFamily && hasFollowupSignal));
|
||||||
(hasSelectedObjectInventorySignal(normalizedMessage) || (previousIsInventoryFamily && hasFollowupSignal));
|
|
||||||
const previousCounterpartyLaneActive =
|
const previousCounterpartyLaneActive =
|
||||||
hasPreviousCounterparty &&
|
hasPreviousCounterparty &&
|
||||||
(followupContext.previous_anchor_type === "counterparty" ||
|
(followupContext.previous_anchor_type === "counterparty" ||
|
||||||
|
|
@ -1707,6 +1714,7 @@ function deriveIntentWithFollowupContext(
|
||||||
detectedIntent.intent === "account_balance_snapshot" ||
|
detectedIntent.intent === "account_balance_snapshot" ||
|
||||||
detectedIntent.intent === "documents_forming_balance" ||
|
detectedIntent.intent === "documents_forming_balance" ||
|
||||||
detectedIntent.intent === "inventory_margin_ranking_for_nomenclature" ||
|
detectedIntent.intent === "inventory_margin_ranking_for_nomenclature" ||
|
||||||
|
(detectedIntent.intent === "inventory_profitability_for_item" && !hasSelectedObjectReference) ||
|
||||||
detectedIntent.intent === sourceIntent)
|
detectedIntent.intent === sourceIntent)
|
||||||
) {
|
) {
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,15 @@ function asksForInventoryCostBaseRows(userMessage: string | null | undefined): b
|
||||||
return /(?:себестоимостн|себестоимост|себестоим|закупочн|закупк|90\.02|\b41\b|баз)/iu.test(text);
|
return /(?:себестоимостн|себестоимост|себестоим|закупочн|закупк|90\.02|\b41\b|баз)/iu.test(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function asksForInventoryMarginBasis(userMessage: string | null | undefined): boolean {
|
||||||
|
const text = String(userMessage ?? "").toLowerCase();
|
||||||
|
return (
|
||||||
|
/(?:из\s+чего|как\s+(?:ты\s+)?(?:это\s+)?посчитал|какие\s+поля|чего\s+не\s+хватает|не\s+хватает|точн(?:ой|ая|ую)?\s+марж|basis|source|fields|calculated|missing)/iu.test(
|
||||||
|
text
|
||||||
|
) && /(?:марж|прибыл|себестоимост|выручк|margin|profit|cogs|revenue)/iu.test(text)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
interface InventoryMarginRankingEntry {
|
interface InventoryMarginRankingEntry {
|
||||||
item: string;
|
item: string;
|
||||||
revenue: number;
|
revenue: number;
|
||||||
|
|
@ -627,17 +636,19 @@ export function composeInventoryReply(
|
||||||
const entries = buildInventoryMarginRankingEntries(rows, deps);
|
const entries = buildInventoryMarginRankingEntries(rows, deps);
|
||||||
const confirmedEntries = entries.filter((entry) => entry.revenue > 0 && entry.costProxy > 0);
|
const confirmedEntries = entries.filter((entry) => entry.revenue > 0 && entry.costProxy > 0);
|
||||||
const highMargin = [...confirmedEntries]
|
const highMargin = [...confirmedEntries]
|
||||||
.sort((left, right) => right.spread - left.spread || (right.marginPct ?? -Infinity) - (left.marginPct ?? -Infinity))
|
.sort((left, right) => (right.marginPct ?? -Infinity) - (left.marginPct ?? -Infinity) || right.spread - left.spread)
|
||||||
.slice(0, 5);
|
.slice(0, 3);
|
||||||
const lowMargin = [...confirmedEntries]
|
const lowMargin = [...confirmedEntries]
|
||||||
.sort((left, right) => left.spread - right.spread || (left.marginPct ?? Infinity) - (right.marginPct ?? Infinity))
|
.sort((left, right) => (left.marginPct ?? Infinity) - (right.marginPct ?? Infinity) || left.spread - right.spread)
|
||||||
.slice(0, 5);
|
.slice(0, 3);
|
||||||
const salesWithoutCost = entries.filter((entry) => entry.revenue > 0 && entry.costProxy <= 0);
|
const salesWithoutCost = entries.filter((entry) => entry.revenue > 0 && entry.costProxy <= 0);
|
||||||
const purchasesWithoutSales = entries.filter((entry) => entry.costProxy > 0 && entry.revenue <= 0);
|
const purchasesWithoutSales = entries.filter((entry) => entry.costProxy > 0 && entry.revenue <= 0);
|
||||||
const periodLabel = inventoryProfitabilityPeriodLabel(options, deps);
|
const periodLabel = inventoryProfitabilityPeriodLabel(options, deps);
|
||||||
const totalRevenue = entries.reduce((sum, entry) => sum + entry.revenue, 0);
|
const totalRevenue = entries.reduce((sum, entry) => sum + entry.revenue, 0);
|
||||||
const totalCostProxy = entries.reduce((sum, entry) => sum + entry.costProxy, 0);
|
const totalCostProxy = entries.reduce((sum, entry) => sum + entry.costProxy, 0);
|
||||||
const totalSpread = totalRevenue - totalCostProxy;
|
const totalSpread = totalRevenue - totalCostProxy;
|
||||||
|
const topMarginEntry = highMargin[0] ?? null;
|
||||||
|
const marginBasisRequested = asksForInventoryMarginBasis(options.userMessage);
|
||||||
if (confirmedEntries.length === 0) {
|
if (confirmedEntries.length === 0) {
|
||||||
const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage);
|
const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage);
|
||||||
const lines: string[] = [
|
const lines: string[] = [
|
||||||
|
|
@ -700,19 +711,54 @@ export function composeInventoryReply(
|
||||||
return buildFactualSummaryReply(lines, buildConfirmedBalanceSemantics(entries.length > 0 ? "medium" : "weak", false));
|
return buildFactualSummaryReply(lines, buildConfirmedBalanceSemantics(entries.length > 0 ? "medium" : "weak", false));
|
||||||
}
|
}
|
||||||
const directAnswerLine =
|
const directAnswerLine =
|
||||||
confirmedEntries.length > 0
|
topMarginEntry && marginBasisRequested
|
||||||
? `За период ${periodLabel} собран рейтинг реализованной номенклатуры по валовой маржинальности: выручка ${deps.formatMoneyRub(
|
? `Считал маржу за период ${periodLabel} как выручку реализации минус доступную себестоимостную базу: выручка ${deps.formatMoneyRub(
|
||||||
totalRevenue
|
totalRevenue
|
||||||
)}, себестоимостная база ${deps.formatMoneyRub(totalCostProxy)}, расчетная валовая разница ${deps.formatMoneyRub(
|
)}, себестоимостная база ${deps.formatMoneyRub(totalCostProxy)}, валовая разница ${deps.formatMoneyRub(
|
||||||
totalSpread
|
totalSpread
|
||||||
)}.`
|
)}.`
|
||||||
: `За период ${periodLabel} не удалось подтвердить рейтинг прибыльности номенклатуры: нужны одновременно строки реализации и закупочного/себестоимостного следа по товарам.`;
|
: topMarginEntry
|
||||||
|
? `Самая маржинальная позиция за период ${periodLabel}: ${topMarginEntry.item} — маржа ${formatInventoryPercent(
|
||||||
|
topMarginEntry.marginPct,
|
||||||
|
deps.formatNumberWithDots
|
||||||
|
)}, выручка ${deps.formatMoneyRub(topMarginEntry.revenue)}, себестоимостная база ${deps.formatMoneyRub(
|
||||||
|
topMarginEntry.costProxy
|
||||||
|
)}, валовая разница ${deps.formatMoneyRub(topMarginEntry.spread)}.`
|
||||||
|
: `За период ${periodLabel} не удалось подтвердить рейтинг прибыльности номенклатуры: нужны одновременно строки реализации и закупочного/себестоимостного следа по товарам.`;
|
||||||
const lines: string[] = [directAnswerLine];
|
const lines: string[] = [directAnswerLine];
|
||||||
|
|
||||||
|
if (marginBasisRequested) {
|
||||||
|
appendInventoryBulletSection(lines, "База расчета:", [
|
||||||
|
"выручка: подтвержденные строки реализации по номенклатуре;",
|
||||||
|
"себестоимостная база: доступные строки закупочного/себестоимостного следа по той же номенклатуре;",
|
||||||
|
"валовая маржа: (выручка - себестоимостная база) / выручка."
|
||||||
|
]);
|
||||||
|
const basisLimitations = [
|
||||||
|
"это управленческий расчет валовой маржи, не показатель чистой прибыли;"
|
||||||
|
];
|
||||||
|
if (salesWithoutCost.length > 0) {
|
||||||
|
basisLimitations.push(
|
||||||
|
`по ${deps.formatNumberWithDots(salesWithoutCost.length)} позициям есть продажи без подтвержденной себестоимости реализации;`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (purchasesWithoutSales.length > 0) {
|
||||||
|
basisLimitations.push(
|
||||||
|
`по ${deps.formatNumberWithDots(purchasesWithoutSales.length)} позициям есть себестоимостная база без реализации в периоде;`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
basisLimitations.push("для строгого бухгалтерского расчета нужны проводки 90.01 / 90.02 и проверка закрытия себестоимости.");
|
||||||
|
appendInventoryBulletSection(lines, "Чего не хватает для точной маржи:", basisLimitations);
|
||||||
|
lines.push("", "Следующий шаг: могу раскрыть строки выручки и себестоимостной базы по любой позиции из рейтинга.");
|
||||||
|
return buildFactualSummaryReply(
|
||||||
|
lines,
|
||||||
|
buildConfirmedBalanceSemantics(confirmedEntries.length > 0 ? "strong" : entries.length > 0 ? "medium" : "weak", confirmedEntries.length > 0)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if (highMargin.length > 0) {
|
if (highMargin.length > 0) {
|
||||||
appendInventorySection(
|
appendInventorySection(
|
||||||
lines,
|
lines,
|
||||||
"Высокая валовая маржинальность:",
|
"Высокая валовая маржинальность (топ по проценту маржи):",
|
||||||
highMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps))
|
highMargin.map((entry, index) => formatInventoryMarginRankingLine(entry, index, deps))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -739,6 +785,7 @@ export function composeInventoryReply(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
appendInventoryBulletSection(lines, "Граница ответа:", boundaryLines);
|
appendInventoryBulletSection(lines, "Граница ответа:", boundaryLines);
|
||||||
|
lines.push("", "Следующий шаг: могу раскрыть строки выручки и себестоимостной базы по выбранной позиции из рейтинга.");
|
||||||
|
|
||||||
return buildFactualSummaryReply(
|
return buildFactualSummaryReply(
|
||||||
lines,
|
lines,
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,16 @@ describe("addressIntentResolver regression bridges", () => {
|
||||||
expect(result.reasons).toContain("unicode_nomenclature_margin_ranking_bridge_signal_detected");
|
expect(result.reasons).toContain("unicode_nomenclature_margin_ranking_bridge_signal_detected");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("routes bare highest-margin nomenclature wording away from inventory snapshot", () => {
|
||||||
|
const result = resolveAddressIntent(
|
||||||
|
"\u041a\u0430\u043a\u0430\u044f \u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440\u0430 \u0431\u044b\u043b\u0430 \u0441\u0430\u043c\u043e\u0439 \u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0439 \u0437\u0430 2020 \u0433\u043e\u0434?"
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.intent).toBe("inventory_margin_ranking_for_nomenclature");
|
||||||
|
expect(result.intent).not.toBe("inventory_on_hand_as_of_date");
|
||||||
|
expect(result.reasons).toContain("unicode_nomenclature_margin_ranking_bridge_signal_detected");
|
||||||
|
});
|
||||||
|
|
||||||
it("detects bare historical inventory root with explicit month-year", () => {
|
it("detects bare historical inventory root with explicit month-year", () => {
|
||||||
const result = resolveAddressIntent("остатки на март 2016");
|
const result = resolveAddressIntent("остатки на март 2016");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,15 @@ describe("addressInventoryIntentSignals", () => {
|
||||||
expect(result?.reasons).toContain("inventory_on_hand_signal_detected");
|
expect(result?.reasons).toContain("inventory_on_hand_signal_detected");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("keeps bare highest-margin nomenclature wording out of the stock snapshot route", () => {
|
||||||
|
const result = resolveInventoryAddressIntent(
|
||||||
|
"\u041a\u0430\u043a\u0430\u044f \u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440\u0430 \u0431\u044b\u043b\u0430 \u0441\u0430\u043c\u043e\u0439 \u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0439 \u0437\u0430 2020 \u0433\u043e\u0434?"
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result?.intent).toBe("inventory_margin_ranking_for_nomenclature");
|
||||||
|
expect(result?.reasons).toContain("inventory_margin_ranking_signal_detected");
|
||||||
|
});
|
||||||
|
|
||||||
it("classifies selected-object purchase provenance wording through the extracted inventory owner", () => {
|
it("classifies selected-object purchase provenance wording through the extracted inventory owner", () => {
|
||||||
const result = resolveInventoryAddressIntent("selected object supplier provenance");
|
const result = resolveInventoryAddressIntent("selected object supplier provenance");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -232,6 +232,33 @@ describe("inventory profitability selected-object regressions", () => {
|
||||||
expect(result?.intent.reasons).toContain("intent_adjusted_to_inventory_margin_ranking_followup_context");
|
expect(result?.intent.reasons).toContain("intent_adjusted_to_inventory_margin_ranking_followup_context");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("keeps margin basis follow-up inside ranking context instead of asking for an item", () => {
|
||||||
|
const marginFollowupContext = {
|
||||||
|
previous_intent: "inventory_margin_ranking_for_nomenclature" as const,
|
||||||
|
target_intent: "inventory_margin_ranking_for_nomenclature" as const,
|
||||||
|
root_intent: "inventory_margin_ranking_for_nomenclature" as const,
|
||||||
|
previous_filters: {
|
||||||
|
organization: "OOO Alternative Plus",
|
||||||
|
period_from: "2020-01-01",
|
||||||
|
period_to: "2020-12-31"
|
||||||
|
},
|
||||||
|
previous_anchor_type: "unknown" as const,
|
||||||
|
previous_anchor_value: null
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = runAddressDecomposeStage(
|
||||||
|
"\u0410 \u0438\u0437 \u0447\u0435\u0433\u043e \u0442\u044b \u044d\u0442\u043e \u043f\u043e\u0441\u0447\u0438\u0442\u0430\u043b \u0438 \u0447\u0435\u0433\u043e \u043d\u0435 \u0445\u0432\u0430\u0442\u0430\u0435\u0442 \u0434\u043b\u044f \u0442\u043e\u0447\u043d\u043e\u0439 \u043c\u0430\u0440\u0436\u0438?",
|
||||||
|
marginFollowupContext
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result?.intent.intent).toBe("inventory_margin_ranking_for_nomenclature");
|
||||||
|
expect(result?.filters.extracted_filters.period_from).toBe("2020-01-01");
|
||||||
|
expect(result?.filters.extracted_filters.period_to).toBe("2020-12-31");
|
||||||
|
expect(result?.filters.missing_required_filters).toEqual([]);
|
||||||
|
expect(result?.intent.reasons).toContain("intent_adjusted_to_inventory_margin_ranking_followup_context");
|
||||||
|
});
|
||||||
|
|
||||||
it("does not pivot margin follow-up account-41 correction into a balance snapshot", () => {
|
it("does not pivot margin follow-up account-41 correction into a balance snapshot", () => {
|
||||||
const marginFollowupContext = {
|
const marginFollowupContext = {
|
||||||
previous_intent: "inventory_margin_ranking_for_nomenclature" as const,
|
previous_intent: "inventory_margin_ranking_for_nomenclature" as const,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,137 @@
|
||||||
|
{
|
||||||
|
"run_id": "eval-QvCdJw3L2F",
|
||||||
|
"timestamp": "2026-05-24T07:11:10.815Z",
|
||||||
|
"mode": "single-pass-strict",
|
||||||
|
"use_mock": true,
|
||||||
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
|
"schema_version": "v2_0_2",
|
||||||
|
"dataset": {
|
||||||
|
"source": "inline_raw_questions",
|
||||||
|
"file": null,
|
||||||
|
"raw_questions_count": 3
|
||||||
|
},
|
||||||
|
"cases_total": 3,
|
||||||
|
"metrics": {
|
||||||
|
"schema_validation_pass_rate": 100,
|
||||||
|
"scope_detection_accuracy": null,
|
||||||
|
"scope_in_scope_rate": 33.33,
|
||||||
|
"multi_intent_detected_rate": 0,
|
||||||
|
"clarification_required_rate": 0,
|
||||||
|
"avg_fragments_per_message": 1,
|
||||||
|
"out_of_scope_fragment_rate": 33.33,
|
||||||
|
"routed_fragment_rate": 66.67,
|
||||||
|
"no_route_fragment_rate": 33.33,
|
||||||
|
"route_resolution_accuracy": null,
|
||||||
|
"no_route_precision": null,
|
||||||
|
"false_no_route_rate": null,
|
||||||
|
"execution_state_consistency_rate": 66.67,
|
||||||
|
"executable_with_soft_assumptions_rate": 100,
|
||||||
|
"soft_assumption_used_fragment_rate": 100,
|
||||||
|
"clarification_precision": null,
|
||||||
|
"clarification_recall": null,
|
||||||
|
"false_clarification_rate": null
|
||||||
|
},
|
||||||
|
"budget": {
|
||||||
|
"requests_total": 0,
|
||||||
|
"retries_used": 0
|
||||||
|
},
|
||||||
|
"clarification_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"true_positive": 0,
|
||||||
|
"false_positive": 0,
|
||||||
|
"false_negative": 0
|
||||||
|
},
|
||||||
|
"route_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0,
|
||||||
|
"expected_routed_cases": 0,
|
||||||
|
"no_route_true_positive": 0,
|
||||||
|
"no_route_false_positive": 0
|
||||||
|
},
|
||||||
|
"scope_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0
|
||||||
|
},
|
||||||
|
"execution_state_eval": {
|
||||||
|
"checks_total": 3,
|
||||||
|
"checks_passed": 2
|
||||||
|
},
|
||||||
|
"route_distribution": {
|
||||||
|
"hybrid_store_plus_live": 1,
|
||||||
|
"no_route": 1,
|
||||||
|
"batch_refresh_then_store": 1
|
||||||
|
},
|
||||||
|
"fallback_distribution": {
|
||||||
|
"none": 1,
|
||||||
|
"out_of_scope": 1,
|
||||||
|
"clarification": 1
|
||||||
|
},
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"case_id": "BQ-001",
|
||||||
|
"raw_question": "Проверь хвосты по поставщикам и разложи цепочку",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": true,
|
||||||
|
"scope_confidence": "high",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 1,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "none",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 1,
|
||||||
|
"trace_id": "6H5F0kDlkYF66l",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "BQ-002",
|
||||||
|
"raw_question": "Как вообще по ФСБУ",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": false,
|
||||||
|
"scope_confidence": "low",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 0,
|
||||||
|
"out_of_scope_fragments": 1,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "out_of_scope",
|
||||||
|
"predicted_route_status": "no_route",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": "out_of_scope",
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 0,
|
||||||
|
"trace_id": "e-Yrbn6kxhbKhv",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "BQ-003",
|
||||||
|
"raw_question": "Покажи топ рисков за июнь 2020",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": false,
|
||||||
|
"scope_confidence": "low",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 0,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 1,
|
||||||
|
"fallback_type": "clarification",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 0,
|
||||||
|
"trace_id": "mwO8qxdx71dFCd",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
{
|
||||||
|
"run_id": "eval-_NYgFC2nU2",
|
||||||
|
"timestamp": "2026-05-24T07:11:23.663Z",
|
||||||
|
"mode": "single-pass-strict",
|
||||||
|
"use_mock": true,
|
||||||
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
|
"schema_version": "v2_0_2",
|
||||||
|
"dataset": {
|
||||||
|
"source": "inline_raw_questions",
|
||||||
|
"file": null,
|
||||||
|
"raw_questions_count": 2
|
||||||
|
},
|
||||||
|
"cases_total": 2,
|
||||||
|
"metrics": {
|
||||||
|
"schema_validation_pass_rate": 100,
|
||||||
|
"scope_detection_accuracy": null,
|
||||||
|
"scope_in_scope_rate": 100,
|
||||||
|
"multi_intent_detected_rate": 0,
|
||||||
|
"clarification_required_rate": 0,
|
||||||
|
"avg_fragments_per_message": 1,
|
||||||
|
"out_of_scope_fragment_rate": 0,
|
||||||
|
"routed_fragment_rate": 100,
|
||||||
|
"no_route_fragment_rate": 0,
|
||||||
|
"route_resolution_accuracy": null,
|
||||||
|
"no_route_precision": null,
|
||||||
|
"false_no_route_rate": null,
|
||||||
|
"execution_state_consistency_rate": 100,
|
||||||
|
"executable_with_soft_assumptions_rate": 100,
|
||||||
|
"soft_assumption_used_fragment_rate": 100,
|
||||||
|
"clarification_precision": null,
|
||||||
|
"clarification_recall": null,
|
||||||
|
"false_clarification_rate": null
|
||||||
|
},
|
||||||
|
"budget": {
|
||||||
|
"requests_total": 0,
|
||||||
|
"retries_used": 0
|
||||||
|
},
|
||||||
|
"clarification_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"true_positive": 0,
|
||||||
|
"false_positive": 0,
|
||||||
|
"false_negative": 0
|
||||||
|
},
|
||||||
|
"route_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0,
|
||||||
|
"expected_routed_cases": 0,
|
||||||
|
"no_route_true_positive": 0,
|
||||||
|
"no_route_false_positive": 0
|
||||||
|
},
|
||||||
|
"scope_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0
|
||||||
|
},
|
||||||
|
"execution_state_eval": {
|
||||||
|
"checks_total": 2,
|
||||||
|
"checks_passed": 2
|
||||||
|
},
|
||||||
|
"route_distribution": {
|
||||||
|
"store_feature_risk": 1,
|
||||||
|
"hybrid_store_plus_live": 1
|
||||||
|
},
|
||||||
|
"fallback_distribution": {
|
||||||
|
"none": 2
|
||||||
|
},
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"case_id": "BQ-001",
|
||||||
|
"raw_question": "Проверь счет 60 за июнь 2020",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": true,
|
||||||
|
"scope_confidence": "high",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 1,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "none",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 1,
|
||||||
|
"trace_id": "A1V4KbeK6NiYJK",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "BQ-002",
|
||||||
|
"raw_question": "Покажи риски по счету 97",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": true,
|
||||||
|
"scope_confidence": "high",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 1,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "none",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 1,
|
||||||
|
"trace_id": "Bg-XArIzpQzoHW",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
{
|
||||||
|
"run_id": "eval-lFWABdc8V1",
|
||||||
|
"timestamp": "2026-05-24T07:11:24.006Z",
|
||||||
|
"mode": "single-pass-strict",
|
||||||
|
"use_mock": true,
|
||||||
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
|
"schema_version": "v2_0_2",
|
||||||
|
"dataset": {
|
||||||
|
"source": "inline_raw_questions",
|
||||||
|
"file": null,
|
||||||
|
"raw_questions_count": 2
|
||||||
|
},
|
||||||
|
"cases_total": 2,
|
||||||
|
"metrics": {
|
||||||
|
"schema_validation_pass_rate": 100,
|
||||||
|
"scope_detection_accuracy": null,
|
||||||
|
"scope_in_scope_rate": 100,
|
||||||
|
"multi_intent_detected_rate": 0,
|
||||||
|
"clarification_required_rate": 0,
|
||||||
|
"avg_fragments_per_message": 1,
|
||||||
|
"out_of_scope_fragment_rate": 0,
|
||||||
|
"routed_fragment_rate": 100,
|
||||||
|
"no_route_fragment_rate": 0,
|
||||||
|
"route_resolution_accuracy": null,
|
||||||
|
"no_route_precision": null,
|
||||||
|
"false_no_route_rate": null,
|
||||||
|
"execution_state_consistency_rate": 100,
|
||||||
|
"executable_with_soft_assumptions_rate": 100,
|
||||||
|
"soft_assumption_used_fragment_rate": 100,
|
||||||
|
"clarification_precision": null,
|
||||||
|
"clarification_recall": null,
|
||||||
|
"false_clarification_rate": null
|
||||||
|
},
|
||||||
|
"budget": {
|
||||||
|
"requests_total": 0,
|
||||||
|
"retries_used": 0
|
||||||
|
},
|
||||||
|
"clarification_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"true_positive": 0,
|
||||||
|
"false_positive": 0,
|
||||||
|
"false_negative": 0
|
||||||
|
},
|
||||||
|
"route_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0,
|
||||||
|
"expected_routed_cases": 0,
|
||||||
|
"no_route_true_positive": 0,
|
||||||
|
"no_route_false_positive": 0
|
||||||
|
},
|
||||||
|
"scope_eval": {
|
||||||
|
"labeled_cases": 0,
|
||||||
|
"correct_cases": 0
|
||||||
|
},
|
||||||
|
"execution_state_eval": {
|
||||||
|
"checks_total": 2,
|
||||||
|
"checks_passed": 2
|
||||||
|
},
|
||||||
|
"route_distribution": {
|
||||||
|
"store_feature_risk": 1,
|
||||||
|
"hybrid_store_plus_live": 1
|
||||||
|
},
|
||||||
|
"fallback_distribution": {
|
||||||
|
"none": 2
|
||||||
|
},
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"case_id": "BQ-001",
|
||||||
|
"raw_question": "Проверь счет 60 за июнь 2020",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": true,
|
||||||
|
"scope_confidence": "high",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 1,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "none",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 1,
|
||||||
|
"trace_id": "9NqELSDEJeKcZl",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "BQ-002",
|
||||||
|
"raw_question": "Покажи риски по НДС и по закрытию",
|
||||||
|
"validation_passed": true,
|
||||||
|
"message_in_scope": true,
|
||||||
|
"scope_confidence": "high",
|
||||||
|
"contains_multiple_tasks": false,
|
||||||
|
"fragments_total": 1,
|
||||||
|
"in_scope_fragments": 1,
|
||||||
|
"out_of_scope_fragments": 0,
|
||||||
|
"unclear_fragments": 0,
|
||||||
|
"fallback_type": "none",
|
||||||
|
"predicted_route_status": "routed",
|
||||||
|
"expected_route_status": null,
|
||||||
|
"predicted_no_route_reason": null,
|
||||||
|
"expected_no_route_reason": null,
|
||||||
|
"predicted_clarification_required": false,
|
||||||
|
"expected_clarification_required": null,
|
||||||
|
"executable_with_soft_assumptions_fragments": 1,
|
||||||
|
"trace_id": "AWGkBh8taBraRd",
|
||||||
|
"request_count_for_case": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -2,11 +2,11 @@
|
||||||
"id": "preset-it0w_T10",
|
"id": "preset-it0w_T10",
|
||||||
"name": "NDC custom preset",
|
"name": "NDC custom preset",
|
||||||
"createdAt": "2026-03-23T13:37:13.324Z",
|
"createdAt": "2026-03-23T13:37:13.324Z",
|
||||||
"updatedAt": "2026-03-23T13:37:13.324Z",
|
"updatedAt": "2026-05-24T07:01:44Z",
|
||||||
"prompt_version": "normalizer_v1",
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
"systemPrompt": "Ты semantic-normalizer для бухгалтерического ассистента NDC.\n\nТвоя задача — НЕ отвечать на бухгалтерский вопрос по сути.\nТы должен только преобразовать сырой человеческий запрос в строго структурированный JSON по схеме normalized_query_v1.\n\nЖесткие правила:\n1. Возвращай только валидный JSON.\n2. Не добавляй пояснений вне JSON.\n3. Не выдумывай факты, которых нет в вопросе.\n4. Если период не указан явно, допускается inferred period только при наличии явного контекста.\n5. Если вопрос причинно-следственный, поднимай causal признаки.\n6. Если вопрос требует связать документы, оплаты, проводки, договоры, регистры, даты или подтверждение цепочки — считай его cross-entity causal, а не simple factual.\n7. Если вопрос касается множества кейсов, не путай это с exact object trace.\n8. Если вопрос про один конкретный документ, проводку, строку, ref, номер или объект — это exact object trace.\n9. Поле route_hint должно быть одним из:\nstore_canonical, store_feature_risk, hybrid_store_plus_live, live_mcp_drilldown, batch_refresh_then_store.\n10. Поле schema_version должно быть normalized_query_v1.",
|
"systemPrompt": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.",
|
||||||
"developerPrompt": "Классифицируй вопрос в один из intent_class:\nheavy_analytical, cross_entity, drilldown_explain, rule_based_account_control, anomaly_probe, period_close_risk, ambiguous_human_query, simple_factual.\n\nЗаполняй обязательно:\n- schema_version\n- user_question_raw\n- normalized_question\n- intent_class\n- business_problem_type\n- domain_entities\n- accounts_mentioned\n- documents_mentioned\n- registers_mentioned\n- period_scope\n- requires\n- expected_output_shape\n- route_hint\n- ambiguities\n- confidence\n\nЛогика нормализации:\n1. Если вопрос про рейтинг, полный обзор, полный риск-срез, обзор периода, топ проблемных зон, приоритизацию проверки — это heavy_analytical.\n2. Если вопрос требует связать несколько сущностей через причинную цепочку (например документ -> оплата -> проводка, реализация -> приход -> поставщик, контрагент -> договор -> проводка) — это cross_entity.\n3. Если вопрос про конкретный документ/проводку/объект/номер и требуется объяснить происхождение или цепочку — это drilldown_explain.\n4. Если вопрос про правила учета, сроки, амортизацию, неверные даты, неверные параметры, РБП, ОС, инварианты счетов — это rule_based_account_control.\n5. Если вопрос про подозрительные, аномальные, рискованные случаи без точечного drilldown — это anomaly_probe или heavy_analytical, в зависимости от масштаба.\n6. Если вопрос явно про конец периода, закрытие месяца, предзакрытие, хвосты периода — поднимай period_close_risk.\n7. Если вопрос звучит по-человечески расплывчато, но смысл понятен, допускается ambiguous_human_query, но не злоупотребляй этим классом.\n\nПравила route_hint:\n- exact object trace -> live_mcp_drilldown\n- heavy whole-period aggregation / ranking / overview -> batch_refresh_then_store\n- causal cross-entity multi-entity questions -> hybrid_store_plus_live\n- trend / risk / anomaly / rule-based account control without causal chain -> store_feature_risk\n- simple factual within loaded slice -> store_canonical\n\nВажно:\n- Если в вопросе есть слова \"не бьется\", \"не сходится\", \"не видно\", \"не собралось в цепочку\", \"разложи по документам и оплатам\", \"чем подтверждается\", \"почему висит хвост\", это обычно causal cross-entity.\n- Не отправляй causal cross-entity вопрос в store_canonical только потому, что он звучит как обычный факт.\n- Не отправляй causal cross-entity вопрос в store_feature_risk только потому, что в нем есть слова \"риск\", \"аномалия\", \"проблема\".",
|
"developerPrompt": "You are semantic-normalizer for accounting assistant NDC.\nReturn strict JSON only, no markdown, no comments.\n\nTarget schema: normalized_query_v2_0_2.\n\nCore behavior (v2.0.2):\n1. Decompose message into semantic fragments.\n2. Classify fragment domain relevance and business scope.\n3. Fill route-critical flags and candidate labels.\n4. For each fragment set execution state fields:\n - execution_readiness\n - clarification_reason\n - soft_assumption_used\n - route_status\n - no_route_reason\n5. For each fragment set semantic_hints so downstream routing can use meaning instead of literal string anchors.\n6. Clarification must be rare and justified.\n\nExecution-state policy:\n- Every in-scope fragment must produce a consistent execution state.\n- If a fragment is routable, mark it as executable or executable_with_soft_assumptions.\n- Do not leave routable fragments in unresolved state.\n- If a fragment cannot be routed, set route_status=no_route and provide explicit no_route_reason.\n\nReadiness values:\n- executable\n- executable_with_soft_assumptions\n- needs_clarification\n- no_route\n\nRoute status values:\n- routed\n- no_route\n\nNo-route reason values:\n- out_of_scope\n- insufficient_specificity\n- missing_mapping\n- unsupported_fragment_type\n\nDo not over-require formality:\n- Do not require document IDs, exact periods, or exact object references for scan/review/anomaly/rule-check requests.\n- Colloquial accounting phrases like \"что висит\", \"что подозрительно\", \"что не сходится\", \"что криво\", \"что аукнется\" are executable if accounting area is understandable.\n\nFragment required fields:\n- fragment_id\n- raw_fragment_text\n- normalized_fragment_text\n- domain_relevance\n- business_scope\n- entity_hints\n- account_hints\n- document_hints\n- register_hints\n- time_scope\n- flags\n- semantic_hints\n- candidate_labels\n- confidence\n- execution_readiness\n- clarification_reason\n- soft_assumption_used\n- route_status\n- no_route_reason\n\nSoft assumptions (`soft_assumption_used`) allowed values:\n- period_from_session_context\n- company_scope_defaulted\n- problem_scan_mode_enabled\n\nsemantic_hints fields:\n- scope_target_kind: none | self_scope | selected_object | organization | warehouse | counterparty | contract | item\n- scope_target_text: short user-facing mention when scope_target_kind is organization/warehouse/counterparty/contract/item\n- date_scope_kind: explicit | implicit_current | missing\n- self_scope_detected: true when wording means \"our own scope\" or \"this connected company\"\n- selected_object_scope_detected: true when wording refers to currently selected object/item\n\nSemantic-hints policy:\n- Use semantic_hints to preserve meaning of colloquial or elliptical wording.\n- Do not convert vague possessive wording into a fake literal anchor.\n- If user means \"our company / our connected base / current selected scope\", prefer self_scope_detected=true and scope_target_kind=self_scope.\n- If user refers to a company or organization colloquially, prefer scope_target_kind=organization, not warehouse.\n- If user refers to the selected row/object/item, prefer selected_object_scope_detected=true and scope_target_kind=selected_object or item when item text is explicit.\n- Do not invent exact database names. Use short text from the user in scope_target_text.\n\nExamples:\n- \"что на складе у нас\" -> semantic_hints.scope_target_kind=self_scope; self_scope_detected=true; date_scope_kind=implicit_current\n- \"что на складе конторы альтернатива\" -> semantic_hints.scope_target_kind=organization; scope_target_text=\"альтернатива\"; date_scope_kind=implicit_current\n- \"по выбранному объекту ... кто поставщик\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n- \"по ней какие документы\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n\nGlobal notes:\n- global_notes.needs_clarification should be true only when execution is truly blocked for all in-scope fragments.\n- global_notes.clarification_reason must explain the blocker.\n\nSchema version must be:\n- \"schema_version\": \"normalized_query_v2_0_2\"",
|
||||||
"domainPrompt": "Контур: бухгалтерический ассистент 1С/NDC.\n\nДоменные ориентиры:\n- счета: 01, 02, 10, 41, 51, 60, 62, 68, 90, 97\n- типовые сущности:\n - контрагент\n - договор\n - документ\n - реализация\n - поступление\n - оплата\n - банковская выписка\n - проводка\n - регистр\n - товар\n - склад\n - основное средство\n - расход будущих периодов\n - взаиморасчеты\n - хвост периода\n - закрывающие документы\n - акт сверки\n\nСеманика живого языка:\n- \"не бьется\" = reconciliation mismatch\n- \"хвост\" = unresolved residual / unclosed balance\n- \"не собралось в цепочку\" = missing causal chain\n- \"чем подтверждается\" = evidence required\n- \"что проверить первым\" = prioritized review list\n- \"зависло\" = unresolved accounting case\n- \"продажа раньше прихода\" = sales before supply pattern\n- \"ошибка по дате\" = period/date inconsistency\n- \"реализация без оплаты\" = receivable not closed\n- \"не видно прихода под реализацию\" = causal join between sale and supply required\n\nЕсли вопрос связывает документы, оплаты, проводки, даты, договоры и контрагентов, это обычно causal cross-entity сценарий.",
|
"domainPrompt": "Контекст домена: бухгалтерия 1С/NDC.\n\nКлючевые счета:\n- 01, 02, 10, 41, 51, 60, 62, 68.02, 90, 97.\n\nТиповые сущности:\n- контрагент, договор, документ реализации, документ поступления, оплата, проводка, регистр, закрывающий документ.\n\nЛексика causal и сверки (сильные сигналы для cross_entity):\n- \"не бьется\", \"не сходится\", \"не видно\", \"не собралось\", \"повисло\", \"хвост\";\n- \"разложи по документам/оплатам/закрывающим\";\n- \"чем подтверждается\", \"где ошибка в цепочке\", \"что пошло криво\".\n\nЛексика точечного drilldown:\n- \"документ №...\", \"ref\", \"строка проводки\", \"покажи конкретную операцию\", \"точный source-of-record\".\n\nЛексика rule-based контроля:\n- \"проверь настройки\", \"ошибка срока/даты\", \"контроль 97/10/ОС\", \"нарушение правила учета\".\n\nЛексика обзорной аналитики:\n- \"рейтинг\", \"топ рисков\", \"в целом по компании\", \"перед закрытием периода\", \"приоритизация проверок\".\n\nВажное правило:\nЕсли в одном вопросе есть и риск-лексика, и цепочка document/payment/posting, не понижать задачу до чистого `store_feature_risk`.\nПриоритет у causal cross-entity семантики.\n\nНеформальные scope-формулировки:\n- \"у нас\", \"у себя\", \"по нашей базе\", \"в нашей конторе\" обычно означают self/company scope, а не буквальный якорь склада;\n- \"контора альтернатива\", \"альтернатива\", \"по фирме альтернатива\" обычно означают organization scope, а не склад;\n- \"по выбранному объекту\", \"по ней\", \"по этой позиции\", \"по этому товару\" обычно означают selected object scope.\n\nДля semantic_hints:\n- если речь про текущую подключенную компанию/нашу базу -> scope_target_kind=self_scope;\n- если речь про организацию/фирму/контору -> scope_target_kind=organization;\n- если речь про выбранную позицию/объект -> scope_target_kind=selected_object;\n- для складских snapshot-вопросов без даты обычно date_scope_kind=implicit_current.",
|
||||||
"schemaNotes": "schema_version: normalized_query_v1\nВозвращай только JSON.\nНикаких дополнительных полей вне схемы.\nВсе булевы requires-поля должны быть заполнены явно.\nЕсли поле неизвестно, используй пустой массив, null или missing/inferred по смыслу.",
|
"schemaNotes": "v2.0.2: execution-state hardening + explicit route_status/no_route_reason. ????? normalized_query_v2_0_2.",
|
||||||
"fewShotExamples": "[EXAMPLE 1]\nQ: По каким поставщикам не бьются взаиморасчеты по 60 счету?\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"По каким поставщикам не бьются взаиморасчеты по 60 счету?\",\n \"normalized_question\": \"Показать поставщиков с расхождениями по взаиморасчетам на счете 60 с объяснимой связкой документов и оплат.\",\n \"intent_class\": \"cross_entity\",\n \"business_problem_type\": \"supplier_reconciliation_mismatch\",\n \"domain_entities\": [\"supplier\", \"settlements\", \"documents\", \"payments\", \"postings\"],\n \"accounts_mentioned\": [\"60\"],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"missing\", \"value\": null, \"confidence\": \"low\" },\n \"requires\": {\n \"needs_cross_entity_join\": true,\n \"needs_causal_chain\": true,\n \"needs_exact_object_trace\": false,\n \"needs_ranking\": false,\n \"needs_anomaly_summary\": false,\n \"needs_runtime_truth\": false,\n \"needs_period_cut\": false,\n \"needs_evidence\": true\n },\n \"expected_output_shape\": \"reconciliation_report\",\n \"route_hint\": \"hybrid_store_plus_live\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"high\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}\n\n[EXAMPLE 2]\nQ: Сделай рейтинг самых проблемных хвостов на конец июня.\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"Сделай рейтинг самых проблемных хвостов на конец июня.\",\n \"normalized_question\": \"Построить рейтинг наиболее проблемных незакрытых хвостов на конец июня.\",\n \"intent_class\": \"heavy_analytical\",\n \"business_problem_type\": \"period_close_risk_prioritization\",\n \"domain_entities\": [\"period_close\", \"risk_cases\"],\n \"accounts_mentioned\": [],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"explicit\", \"value\": \"июнь\", \"confidence\": \"high\" },\n \"requires\": {\n \"needs_cross_entity_join\": false,\n \"needs_causal_chain\": false,\n \"needs_exact_object_trace\": false,\n \"needs_ranking\": true,\n \"needs_anomaly_summary\": true,\n \"needs_runtime_truth\": false,\n \"needs_period_cut\": true,\n \"needs_evidence\": false\n },\n \"expected_output_shape\": \"ranked_list\",\n \"route_hint\": \"batch_refresh_then_store\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"high\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}\n\n[EXAMPLE 3]\nQ: Почему эта проводка вообще появилась?\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"Почему эта проводка вообще появилась?\",\n \"normalized_question\": \"Объяснить происхождение конкретной проводки и ее source-of-record цепочку.\",\n \"intent_class\": \"drilldown_explain\",\n \"business_problem_type\": \"posting_origin_trace\",\n \"domain_entities\": [\"posting\", \"document\", \"source_record\"],\n \"accounts_mentioned\": [],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"missing\", \"value\": null, \"confidence\": \"low\" },\n \"requires\": {\n \"needs_cross_entity_join\": false,\n \"needs_causal_chain\": true,\n \"needs_exact_object_trace\": true,\n \"needs_ranking\": false,\n \"needs_anomaly_summary\": false,\n \"needs_runtime_truth\": true,\n \"needs_period_cut\": false,\n \"needs_evidence\": true\n },\n \"expected_output_shape\": \"evidence_chain\",\n \"route_hint\": \"live_mcp_drilldown\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"medium\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}"
|
"fewShotExamples": "Q: По каким поставщикам висят хвосты по 60, что подозрительно по цепочке оплат?\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Покажи записи по 97, которые повисли и могут аукнуться.\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Чекни, что у нас не так.\nA: unclear/in_scope fragment, execution_readiness=needs_clarification, route_status=no_route, no_route_reason=insufficient_specificity.\n\nQ: Как вообще по ФСБУ правильно?\nA: out_of_scope fragment, execution_readiness=no_route, route_status=no_route, no_route_reason=out_of_scope."
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,11 @@
|
||||||
"id": "preset-rk8wKqPt",
|
"id": "preset-rk8wKqPt",
|
||||||
"name": "NDC",
|
"name": "NDC",
|
||||||
"createdAt": "2026-03-23T13:41:04.687Z",
|
"createdAt": "2026-03-23T13:41:04.687Z",
|
||||||
"updatedAt": "2026-03-23T13:41:04.687Z",
|
"updatedAt": "2026-05-24T07:01:44Z",
|
||||||
"prompt_version": "normalizer_v1",
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
"systemPrompt": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.",
|
"systemPrompt": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.",
|
||||||
"developerPrompt": "Классификация intent_class:\n- heavy_analytical: общий агрегированный риск-срез, рейтинг, приоритизация.\n- cross_entity: связки между документами/проводками/оплатами/договорами/контрагентами.\n- drilldown_explain: точечное объяснение причин по объекту или малому набору объектов.\n- rule_based_account_control: контрольные правила по счетам (ОС, 97, 10 и т.п.).\n- anomaly_probe: поиск нетипичных паттернов.\n- period_close_risk: фокус на предзакрытии периода.\n- ambiguous_human_query: широкая человеческая формулировка без точного scope.\n- simple_factual: простой факт без сложной аналитики.\n\nПравила route_hint:\n- live_mcp_drilldown: если точечный object trace.\n- hybrid_store_plus_live: если cross_entity + causal explain.\n- batch_refresh_then_store: если full-period heavy aggregate/ranking без готовой агрегации.\n- store_feature_risk: если тренд/аномалии/контроли, когда точечный runtime не обязателен.\n- store_canonical: простые факты и легкие запросы при достаточном контексте.\n\nПравила requires:\n- needs_cross_entity_join=true для связок между разными сущностями.\n- needs_causal_chain=true для формулировок \"почему\", \"чем подтверждается\", \"разложи цепочку\".\n- needs_exact_object_trace=true для конкретного документа/проводки/строки/номера/ref.\n- needs_period_cut=true если вопрос про конец периода или периодную сверку.\n- needs_evidence=true если требуется подтверждение документами/движениями/проводками.",
|
"developerPrompt": "You are semantic-normalizer for accounting assistant NDC.\nReturn strict JSON only, no markdown, no comments.\n\nTarget schema: normalized_query_v2_0_2.\n\nCore behavior (v2.0.2):\n1. Decompose message into semantic fragments.\n2. Classify fragment domain relevance and business scope.\n3. Fill route-critical flags and candidate labels.\n4. For each fragment set execution state fields:\n - execution_readiness\n - clarification_reason\n - soft_assumption_used\n - route_status\n - no_route_reason\n5. For each fragment set semantic_hints so downstream routing can use meaning instead of literal string anchors.\n6. Clarification must be rare and justified.\n\nExecution-state policy:\n- Every in-scope fragment must produce a consistent execution state.\n- If a fragment is routable, mark it as executable or executable_with_soft_assumptions.\n- Do not leave routable fragments in unresolved state.\n- If a fragment cannot be routed, set route_status=no_route and provide explicit no_route_reason.\n\nReadiness values:\n- executable\n- executable_with_soft_assumptions\n- needs_clarification\n- no_route\n\nRoute status values:\n- routed\n- no_route\n\nNo-route reason values:\n- out_of_scope\n- insufficient_specificity\n- missing_mapping\n- unsupported_fragment_type\n\nDo not over-require formality:\n- Do not require document IDs, exact periods, or exact object references for scan/review/anomaly/rule-check requests.\n- Colloquial accounting phrases like \"что висит\", \"что подозрительно\", \"что не сходится\", \"что криво\", \"что аукнется\" are executable if accounting area is understandable.\n\nFragment required fields:\n- fragment_id\n- raw_fragment_text\n- normalized_fragment_text\n- domain_relevance\n- business_scope\n- entity_hints\n- account_hints\n- document_hints\n- register_hints\n- time_scope\n- flags\n- semantic_hints\n- candidate_labels\n- confidence\n- execution_readiness\n- clarification_reason\n- soft_assumption_used\n- route_status\n- no_route_reason\n\nSoft assumptions (`soft_assumption_used`) allowed values:\n- period_from_session_context\n- company_scope_defaulted\n- problem_scan_mode_enabled\n\nsemantic_hints fields:\n- scope_target_kind: none | self_scope | selected_object | organization | warehouse | counterparty | contract | item\n- scope_target_text: short user-facing mention when scope_target_kind is organization/warehouse/counterparty/contract/item\n- date_scope_kind: explicit | implicit_current | missing\n- self_scope_detected: true when wording means \"our own scope\" or \"this connected company\"\n- selected_object_scope_detected: true when wording refers to currently selected object/item\n\nSemantic-hints policy:\n- Use semantic_hints to preserve meaning of colloquial or elliptical wording.\n- Do not convert vague possessive wording into a fake literal anchor.\n- If user means \"our company / our connected base / current selected scope\", prefer self_scope_detected=true and scope_target_kind=self_scope.\n- If user refers to a company or organization colloquially, prefer scope_target_kind=organization, not warehouse.\n- If user refers to the selected row/object/item, prefer selected_object_scope_detected=true and scope_target_kind=selected_object or item when item text is explicit.\n- Do not invent exact database names. Use short text from the user in scope_target_text.\n\nExamples:\n- \"что на складе у нас\" -> semantic_hints.scope_target_kind=self_scope; self_scope_detected=true; date_scope_kind=implicit_current\n- \"что на складе конторы альтернатива\" -> semantic_hints.scope_target_kind=organization; scope_target_text=\"альтернатива\"; date_scope_kind=implicit_current\n- \"по выбранному объекту ... кто поставщик\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n- \"по ней какие документы\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n\nGlobal notes:\n- global_notes.needs_clarification should be true only when execution is truly blocked for all in-scope fragments.\n- global_notes.clarification_reason must explain the blocker.\n\nSchema version must be:\n- \"schema_version\": \"normalized_query_v2_0_2\"",
|
||||||
"domainPrompt": "Домен бухгалтерии:\n- ключевые счета: 01, 02, 10, 41, 51, 60, 62, 68.02, 90, 97;\n- сущности: контрагент, договор, реализация, поступление, оплата, проводка, регистр;\n- типовые паттерны: \"не бьется\", \"хвост\", \"акт сверки\", \"закрывающие\", \"реализация без оплаты\";\n- товарные аномалии: \"продажа раньше прихода\", \"подозрительный остаток\";\n- ОС: \"амортизационная группа\", \"срок амортизации\", \"карточка ОС\";\n- банк: \"выписка\", \"движение по 51\", \"разрыв цепочки документ-проводка\";\n- периодная аналитика: предзакрытие, риск-срез, приоритизация ручных проверок.\n\nЕсли присутствуют одновременно риск-слова и document/payment/posting chain,\nне понижать сценарий до чистого risk-route автоматически.",
|
"domainPrompt": "Контекст домена: бухгалтерия 1С/NDC.\n\nКлючевые счета:\n- 01, 02, 10, 41, 51, 60, 62, 68.02, 90, 97.\n\nТиповые сущности:\n- контрагент, договор, документ реализации, документ поступления, оплата, проводка, регистр, закрывающий документ.\n\nЛексика causal и сверки (сильные сигналы для cross_entity):\n- \"не бьется\", \"не сходится\", \"не видно\", \"не собралось\", \"повисло\", \"хвост\";\n- \"разложи по документам/оплатам/закрывающим\";\n- \"чем подтверждается\", \"где ошибка в цепочке\", \"что пошло криво\".\n\nЛексика точечного drilldown:\n- \"документ №...\", \"ref\", \"строка проводки\", \"покажи конкретную операцию\", \"точный source-of-record\".\n\nЛексика rule-based контроля:\n- \"проверь настройки\", \"ошибка срока/даты\", \"контроль 97/10/ОС\", \"нарушение правила учета\".\n\nЛексика обзорной аналитики:\n- \"рейтинг\", \"топ рисков\", \"в целом по компании\", \"перед закрытием периода\", \"приоритизация проверок\".\n\nВажное правило:\nЕсли в одном вопросе есть и риск-лексика, и цепочка document/payment/posting, не понижать задачу до чистого `store_feature_risk`.\nПриоритет у causal cross-entity семантики.\n\nНеформальные scope-формулировки:\n- \"у нас\", \"у себя\", \"по нашей базе\", \"в нашей конторе\" обычно означают self/company scope, а не буквальный якорь склада;\n- \"контора альтернатива\", \"альтернатива\", \"по фирме альтернатива\" обычно означают organization scope, а не склад;\n- \"по выбранному объекту\", \"по ней\", \"по этой позиции\", \"по этому товару\" обычно означают selected object scope.\n\nДля semantic_hints:\n- если речь про текущую подключенную компанию/нашу базу -> scope_target_kind=self_scope;\n- если речь про организацию/фирму/контору -> scope_target_kind=organization;\n- если речь про выбранную позицию/объект -> scope_target_kind=selected_object;\n- для складских snapshot-вопросов без даты обычно date_scope_kind=implicit_current.",
|
||||||
"schemaNotes": "schema_version: normalized_query_v1\nВозвращай только JSON.\nНикаких дополнительных полей вне схемы.\nВсе булевы requires-поля должны быть заполнены явно.\nЕсли поле неизвестно, используй пустой массив, null или missing/inferred по смыслу.",
|
"schemaNotes": "v2.0.2: execution-state hardening + explicit route_status/no_route_reason. ????? normalized_query_v2_0_2.",
|
||||||
"fewShotExamples": "Q: По каким покупателям у нас отгрузки без оплаты на конец июня, свяжи с реализациями, договорами и проводками.\nExpected:\n{\n \"intent_class\": \"cross_entity\",\n \"requires\": {\n \"needs_cross_entity_join\": true,\n \"needs_causal_chain\": true,\n \"needs_exact_object_trace\": false\n },\n \"expected_output_shape\": \"reconciliation_report\",\n \"route_hint\": \"hybrid_store_plus_live\"\n}\n\nQ: Сделай рейтинг самых рисковых счетов перед закрытием июня.\nExpected:\n{\n \"intent_class\": \"heavy_analytical\",\n \"requires\": {\n \"needs_ranking\": true,\n \"needs_period_cut\": true\n },\n \"expected_output_shape\": \"ranked_list\",\n \"route_hint\": \"batch_refresh_then_store\"\n}\n\nQ: Покажи документ №123 и проводку по нему, нужна точная строка.\nExpected:\n{\n \"intent_class\": \"drilldown_explain\",\n \"requires\": {\n \"needs_exact_object_trace\": true,\n \"needs_runtime_truth\": true\n },\n \"expected_output_shape\": \"evidence_chain\",\n \"route_hint\": \"live_mcp_drilldown\"\n}"
|
"fewShotExamples": "Q: По каким поставщикам висят хвосты по 60, что подозрительно по цепочке оплат?\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Покажи записи по 97, которые повисли и могут аукнуться.\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Чекни, что у нас не так.\nA: unclear/in_scope fragment, execution_readiness=needs_clarification, route_status=no_route, no_route_reason=insufficient_specificity.\n\nQ: Как вообще по ФСБУ правильно?\nA: out_of_scope fragment, execution_readiness=no_route, route_status=no_route, no_route_reason=out_of_scope."
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,11 @@
|
||||||
"id": "preset-splJ9OGZ",
|
"id": "preset-splJ9OGZ",
|
||||||
"name": "NDC custom preset",
|
"name": "NDC custom preset",
|
||||||
"createdAt": "2026-03-23T13:37:11.819Z",
|
"createdAt": "2026-03-23T13:37:11.819Z",
|
||||||
"updatedAt": "2026-03-23T13:37:11.819Z",
|
"updatedAt": "2026-05-24T07:01:44Z",
|
||||||
"prompt_version": "normalizer_v1",
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
"systemPrompt": "Ты semantic-normalizer для бухгалтерического ассистента NDC.\n\nТвоя задача — НЕ отвечать на бухгалтерский вопрос по сути.\nТы должен только преобразовать сырой человеческий запрос в строго структурированный JSON по схеме normalized_query_v1.\n\nЖесткие правила:\n1. Возвращай только валидный JSON.\n2. Не добавляй пояснений вне JSON.\n3. Не выдумывай факты, которых нет в вопросе.\n4. Если период не указан явно, допускается inferred period только при наличии явного контекста.\n5. Если вопрос причинно-следственный, поднимай causal признаки.\n6. Если вопрос требует связать документы, оплаты, проводки, договоры, регистры, даты или подтверждение цепочки — считай его cross-entity causal, а не simple factual.\n7. Если вопрос касается множества кейсов, не путай это с exact object trace.\n8. Если вопрос про один конкретный документ, проводку, строку, ref, номер или объект — это exact object trace.\n9. Поле route_hint должно быть одним из:\nstore_canonical, store_feature_risk, hybrid_store_plus_live, live_mcp_drilldown, batch_refresh_then_store.\n10. Поле schema_version должно быть normalized_query_v1.",
|
"systemPrompt": "Ты semantic-normalizer для бухгалтерского ассистента NDC.\nТвоя роль: только нормализация запроса пользователя в строгий JSON-контракт.\n\nЖесткие правила:\n1) Не давай бухгалтерский ответ по сути вопроса.\n2) Возвращай только JSON без markdown и пояснений.\n3) JSON обязан соответствовать переданной schema normalized_query_v1.\n4) Если период не указан, не выдумывай его; отмечай ambiguity.\n5) Для цепочек документов/проводок/оплат поднимай causal и cross-entity признаки.\n6) Для точечного object trace (номер/строка/ref) поднимай needs_exact_object_trace=true.\n7) Используй терминологию NDC.",
|
||||||
"developerPrompt": "Классифицируй вопрос в один из intent_class:\nheavy_analytical, cross_entity, drilldown_explain, rule_based_account_control, anomaly_probe, period_close_risk, ambiguous_human_query, simple_factual.\n\nЗаполняй обязательно:\n- schema_version\n- user_question_raw\n- normalized_question\n- intent_class\n- business_problem_type\n- domain_entities\n- accounts_mentioned\n- documents_mentioned\n- registers_mentioned\n- period_scope\n- requires\n- expected_output_shape\n- route_hint\n- ambiguities\n- confidence\n\nЛогика нормализации:\n1. Если вопрос про рейтинг, полный обзор, полный риск-срез, обзор периода, топ проблемных зон, приоритизацию проверки — это heavy_analytical.\n2. Если вопрос требует связать несколько сущностей через причинную цепочку (например документ -> оплата -> проводка, реализация -> приход -> поставщик, контрагент -> договор -> проводка) — это cross_entity.\n3. Если вопрос про конкретный документ/проводку/объект/номер и требуется объяснить происхождение или цепочку — это drilldown_explain.\n4. Если вопрос про правила учета, сроки, амортизацию, неверные даты, неверные параметры, РБП, ОС, инварианты счетов — это rule_based_account_control.\n5. Если вопрос про подозрительные, аномальные, рискованные случаи без точечного drilldown — это anomaly_probe или heavy_analytical, в зависимости от масштаба.\n6. Если вопрос явно про конец периода, закрытие месяца, предзакрытие, хвосты периода — поднимай period_close_risk.\n7. Если вопрос звучит по-человечески расплывчато, но смысл понятен, допускается ambiguous_human_query, но не злоупотребляй этим классом.\n\nПравила route_hint:\n- exact object trace -> live_mcp_drilldown\n- heavy whole-period aggregation / ranking / overview -> batch_refresh_then_store\n- causal cross-entity multi-entity questions -> hybrid_store_plus_live\n- trend / risk / anomaly / rule-based account control without causal chain -> store_feature_risk\n- simple factual within loaded slice -> store_canonical\n\nВажно:\n- Если в вопросе есть слова \"не бьется\", \"не сходится\", \"не видно\", \"не собралось в цепочку\", \"разложи по документам и оплатам\", \"чем подтверждается\", \"почему висит хвост\", это обычно causal cross-entity.\n- Не отправляй causal cross-entity вопрос в store_canonical только потому, что он звучит как обычный факт.\n- Не отправляй causal cross-entity вопрос в store_feature_risk только потому, что в нем есть слова \"риск\", \"аномалия\", \"проблема\".",
|
"developerPrompt": "You are semantic-normalizer for accounting assistant NDC.\nReturn strict JSON only, no markdown, no comments.\n\nTarget schema: normalized_query_v2_0_2.\n\nCore behavior (v2.0.2):\n1. Decompose message into semantic fragments.\n2. Classify fragment domain relevance and business scope.\n3. Fill route-critical flags and candidate labels.\n4. For each fragment set execution state fields:\n - execution_readiness\n - clarification_reason\n - soft_assumption_used\n - route_status\n - no_route_reason\n5. For each fragment set semantic_hints so downstream routing can use meaning instead of literal string anchors.\n6. Clarification must be rare and justified.\n\nExecution-state policy:\n- Every in-scope fragment must produce a consistent execution state.\n- If a fragment is routable, mark it as executable or executable_with_soft_assumptions.\n- Do not leave routable fragments in unresolved state.\n- If a fragment cannot be routed, set route_status=no_route and provide explicit no_route_reason.\n\nReadiness values:\n- executable\n- executable_with_soft_assumptions\n- needs_clarification\n- no_route\n\nRoute status values:\n- routed\n- no_route\n\nNo-route reason values:\n- out_of_scope\n- insufficient_specificity\n- missing_mapping\n- unsupported_fragment_type\n\nDo not over-require formality:\n- Do not require document IDs, exact periods, or exact object references for scan/review/anomaly/rule-check requests.\n- Colloquial accounting phrases like \"что висит\", \"что подозрительно\", \"что не сходится\", \"что криво\", \"что аукнется\" are executable if accounting area is understandable.\n\nFragment required fields:\n- fragment_id\n- raw_fragment_text\n- normalized_fragment_text\n- domain_relevance\n- business_scope\n- entity_hints\n- account_hints\n- document_hints\n- register_hints\n- time_scope\n- flags\n- semantic_hints\n- candidate_labels\n- confidence\n- execution_readiness\n- clarification_reason\n- soft_assumption_used\n- route_status\n- no_route_reason\n\nSoft assumptions (`soft_assumption_used`) allowed values:\n- period_from_session_context\n- company_scope_defaulted\n- problem_scan_mode_enabled\n\nsemantic_hints fields:\n- scope_target_kind: none | self_scope | selected_object | organization | warehouse | counterparty | contract | item\n- scope_target_text: short user-facing mention when scope_target_kind is organization/warehouse/counterparty/contract/item\n- date_scope_kind: explicit | implicit_current | missing\n- self_scope_detected: true when wording means \"our own scope\" or \"this connected company\"\n- selected_object_scope_detected: true when wording refers to currently selected object/item\n\nSemantic-hints policy:\n- Use semantic_hints to preserve meaning of colloquial or elliptical wording.\n- Do not convert vague possessive wording into a fake literal anchor.\n- If user means \"our company / our connected base / current selected scope\", prefer self_scope_detected=true and scope_target_kind=self_scope.\n- If user refers to a company or organization colloquially, prefer scope_target_kind=organization, not warehouse.\n- If user refers to the selected row/object/item, prefer selected_object_scope_detected=true and scope_target_kind=selected_object or item when item text is explicit.\n- Do not invent exact database names. Use short text from the user in scope_target_text.\n\nExamples:\n- \"что на складе у нас\" -> semantic_hints.scope_target_kind=self_scope; self_scope_detected=true; date_scope_kind=implicit_current\n- \"что на складе конторы альтернатива\" -> semantic_hints.scope_target_kind=organization; scope_target_text=\"альтернатива\"; date_scope_kind=implicit_current\n- \"по выбранному объекту ... кто поставщик\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n- \"по ней какие документы\" -> semantic_hints.scope_target_kind=selected_object; selected_object_scope_detected=true\n\nGlobal notes:\n- global_notes.needs_clarification should be true only when execution is truly blocked for all in-scope fragments.\n- global_notes.clarification_reason must explain the blocker.\n\nSchema version must be:\n- \"schema_version\": \"normalized_query_v2_0_2\"",
|
||||||
"domainPrompt": "Контур: бухгалтерический ассистент 1С/NDC.\n\nДоменные ориентиры:\n- счета: 01, 02, 10, 41, 51, 60, 62, 68, 90, 97\n- типовые сущности:\n - контрагент\n - договор\n - документ\n - реализация\n - поступление\n - оплата\n - банковская выписка\n - проводка\n - регистр\n - товар\n - склад\n - основное средство\n - расход будущих периодов\n - взаиморасчеты\n - хвост периода\n - закрывающие документы\n - акт сверки\n\nСеманика живого языка:\n- \"не бьется\" = reconciliation mismatch\n- \"хвост\" = unresolved residual / unclosed balance\n- \"не собралось в цепочку\" = missing causal chain\n- \"чем подтверждается\" = evidence required\n- \"что проверить первым\" = prioritized review list\n- \"зависло\" = unresolved accounting case\n- \"продажа раньше прихода\" = sales before supply pattern\n- \"ошибка по дате\" = period/date inconsistency\n- \"реализация без оплаты\" = receivable not closed\n- \"не видно прихода под реализацию\" = causal join between sale and supply required\n\nЕсли вопрос связывает документы, оплаты, проводки, даты, договоры и контрагентов, это обычно causal cross-entity сценарий.",
|
"domainPrompt": "Контекст домена: бухгалтерия 1С/NDC.\n\nКлючевые счета:\n- 01, 02, 10, 41, 51, 60, 62, 68.02, 90, 97.\n\nТиповые сущности:\n- контрагент, договор, документ реализации, документ поступления, оплата, проводка, регистр, закрывающий документ.\n\nЛексика causal и сверки (сильные сигналы для cross_entity):\n- \"не бьется\", \"не сходится\", \"не видно\", \"не собралось\", \"повисло\", \"хвост\";\n- \"разложи по документам/оплатам/закрывающим\";\n- \"чем подтверждается\", \"где ошибка в цепочке\", \"что пошло криво\".\n\nЛексика точечного drilldown:\n- \"документ №...\", \"ref\", \"строка проводки\", \"покажи конкретную операцию\", \"точный source-of-record\".\n\nЛексика rule-based контроля:\n- \"проверь настройки\", \"ошибка срока/даты\", \"контроль 97/10/ОС\", \"нарушение правила учета\".\n\nЛексика обзорной аналитики:\n- \"рейтинг\", \"топ рисков\", \"в целом по компании\", \"перед закрытием периода\", \"приоритизация проверок\".\n\nВажное правило:\nЕсли в одном вопросе есть и риск-лексика, и цепочка document/payment/posting, не понижать задачу до чистого `store_feature_risk`.\nПриоритет у causal cross-entity семантики.\n\nНеформальные scope-формулировки:\n- \"у нас\", \"у себя\", \"по нашей базе\", \"в нашей конторе\" обычно означают self/company scope, а не буквальный якорь склада;\n- \"контора альтернатива\", \"альтернатива\", \"по фирме альтернатива\" обычно означают organization scope, а не склад;\n- \"по выбранному объекту\", \"по ней\", \"по этой позиции\", \"по этому товару\" обычно означают selected object scope.\n\nДля semantic_hints:\n- если речь про текущую подключенную компанию/нашу базу -> scope_target_kind=self_scope;\n- если речь про организацию/фирму/контору -> scope_target_kind=organization;\n- если речь про выбранную позицию/объект -> scope_target_kind=selected_object;\n- для складских snapshot-вопросов без даты обычно date_scope_kind=implicit_current.",
|
||||||
"schemaNotes": "schema_version: normalized_query_v1\nВозвращай только JSON.\nНикаких дополнительных полей вне схемы.\nВсе булевы requires-поля должны быть заполнены явно.\nЕсли поле неизвестно, используй пустой массив, null или missing/inferred по смыслу.",
|
"schemaNotes": "v2.0.2: execution-state hardening + explicit route_status/no_route_reason. ????? normalized_query_v2_0_2.",
|
||||||
"fewShotExamples": "[EXAMPLE 1]\nQ: По каким поставщикам не бьются взаиморасчеты по 60 счету?\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"По каким поставщикам не бьются взаиморасчеты по 60 счету?\",\n \"normalized_question\": \"Показать поставщиков с расхождениями по взаиморасчетам на счете 60 с объяснимой связкой документов и оплат.\",\n \"intent_class\": \"cross_entity\",\n \"business_problem_type\": \"supplier_reconciliation_mismatch\",\n \"domain_entities\": [\"supplier\", \"settlements\", \"documents\", \"payments\", \"postings\"],\n \"accounts_mentioned\": [\"60\"],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"missing\", \"value\": null, \"confidence\": \"low\" },\n \"requires\": {\n \"needs_cross_entity_join\": true,\n \"needs_causal_chain\": true,\n \"needs_exact_object_trace\": false,\n \"needs_ranking\": false,\n \"needs_anomaly_summary\": false,\n \"needs_runtime_truth\": false,\n \"needs_period_cut\": false,\n \"needs_evidence\": true\n },\n \"expected_output_shape\": \"reconciliation_report\",\n \"route_hint\": \"hybrid_store_plus_live\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"high\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}\n\n[EXAMPLE 2]\nQ: Сделай рейтинг самых проблемных хвостов на конец июня.\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"Сделай рейтинг самых проблемных хвостов на конец июня.\",\n \"normalized_question\": \"Построить рейтинг наиболее проблемных незакрытых хвостов на конец июня.\",\n \"intent_class\": \"heavy_analytical\",\n \"business_problem_type\": \"period_close_risk_prioritization\",\n \"domain_entities\": [\"period_close\", \"risk_cases\"],\n \"accounts_mentioned\": [],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"explicit\", \"value\": \"июнь\", \"confidence\": \"high\" },\n \"requires\": {\n \"needs_cross_entity_join\": false,\n \"needs_causal_chain\": false,\n \"needs_exact_object_trace\": false,\n \"needs_ranking\": true,\n \"needs_anomaly_summary\": true,\n \"needs_runtime_truth\": false,\n \"needs_period_cut\": true,\n \"needs_evidence\": false\n },\n \"expected_output_shape\": \"ranked_list\",\n \"route_hint\": \"batch_refresh_then_store\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"high\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}\n\n[EXAMPLE 3]\nQ: Почему эта проводка вообще появилась?\nA:\n{\n \"schema_version\": \"normalized_query_v1\",\n \"user_question_raw\": \"Почему эта проводка вообще появилась?\",\n \"normalized_question\": \"Объяснить происхождение конкретной проводки и ее source-of-record цепочку.\",\n \"intent_class\": \"drilldown_explain\",\n \"business_problem_type\": \"posting_origin_trace\",\n \"domain_entities\": [\"posting\", \"document\", \"source_record\"],\n \"accounts_mentioned\": [],\n \"documents_mentioned\": [],\n \"registers_mentioned\": [],\n \"period_scope\": { \"type\": \"missing\", \"value\": null, \"confidence\": \"low\" },\n \"requires\": {\n \"needs_cross_entity_join\": false,\n \"needs_causal_chain\": true,\n \"needs_exact_object_trace\": true,\n \"needs_ranking\": false,\n \"needs_anomaly_summary\": false,\n \"needs_runtime_truth\": true,\n \"needs_period_cut\": false,\n \"needs_evidence\": true\n },\n \"expected_output_shape\": \"evidence_chain\",\n \"route_hint\": \"live_mcp_drilldown\",\n \"ambiguities\": [],\n \"confidence\": { \"overall\": \"medium\", \"intent_class\": \"high\", \"route_hint\": \"high\" }\n}"
|
"fewShotExamples": "Q: По каким поставщикам висят хвосты по 60, что подозрительно по цепочке оплат?\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Покажи записи по 97, которые повисли и могут аукнуться.\nA: in_scope fragment, execution_readiness=executable_with_soft_assumptions, route_status=routed, no_route_reason=null.\n\nQ: Чекни, что у нас не так.\nA: unclear/in_scope fragment, execution_readiness=needs_clarification, route_status=no_route, no_route_reason=insufficient_specificity.\n\nQ: Как вообще по ФСБУ правильно?\nA: out_of_scope fragment, execution_readiness=no_route, route_status=no_route, no_route_reason=out_of_scope."
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,467 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
EFFECTIVE_RUNTIME_SCHEMA_VERSION = "agent_effective_runtime_v1"
|
||||||
|
PROMPT_REGISTRY_HEALTH_SCHEMA_VERSION = "prompt_registry_health_v1"
|
||||||
|
EFFECTIVE_RUNTIME_FILE_NAME = "effective_runtime.json"
|
||||||
|
|
||||||
|
CONFIG_TS = REPO_ROOT / "llm_normalizer" / "backend" / "src" / "config.ts"
|
||||||
|
PROMPT_BUILDER_TS = REPO_ROOT / "llm_normalizer" / "backend" / "src" / "services" / "promptBuilder.ts"
|
||||||
|
PROMPTS_DIR = REPO_ROOT / "llm_normalizer" / "prompts"
|
||||||
|
PRESETS_DIR = REPO_ROOT / "llm_normalizer" / "data" / "presets"
|
||||||
|
SHARED_LLM_CONNECTION_CONFIG = REPO_ROOT / "llm_normalizer" / "data" / "shared_llm_connection.json"
|
||||||
|
DEFAULT_MCP_PROXY_URL = "http://127.0.0.1:6003"
|
||||||
|
ASSISTANT_RUNTIME_PROMPT_VERSIONS = {"address_query_runtime_v1"}
|
||||||
|
|
||||||
|
BUILTIN_PROMPT_FILES: dict[str, dict[str, str]] = {
|
||||||
|
"normalizer_v1": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/default.txt",
|
||||||
|
"domain": "domain/default.txt",
|
||||||
|
"fewshot": "fewshot/default.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v1_1": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v1_1.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_fewshot_v1_1.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v1_1_1": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v1_1_1.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_fewshot_v1_1_1.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v1_1_2": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v1_1_2.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_fewshot_v1_1_2.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v1_1_2_1": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v1_1_2_1.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_fewshot_v1_1_2_1.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v2": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v2.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_v2.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v2_0_1": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v2_0_1.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_v2_0_1.txt",
|
||||||
|
},
|
||||||
|
"normalizer_v2_0_2": {
|
||||||
|
"system": "system/default.txt",
|
||||||
|
"developer": "developer/normalizer_v2_0_2.txt",
|
||||||
|
"domain": "domain/normalizer_domain_v1_1.txt",
|
||||||
|
"fewshot": "fewshot/normalizer_v2_0_2.txt",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def now_utc_iso() -> str:
|
||||||
|
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def repo_relative(path: Path, repo_root: Path = REPO_ROOT) -> str:
|
||||||
|
try:
|
||||||
|
return str(path.resolve().relative_to(repo_root.resolve())).replace("\\", "/")
|
||||||
|
except ValueError:
|
||||||
|
return str(path.resolve())
|
||||||
|
|
||||||
|
|
||||||
|
def read_json_object(path: Path) -> dict[str, Any]:
|
||||||
|
parsed = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
return parsed if isinstance(parsed, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def write_json(path: Path, payload: Any) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", newline="\n")
|
||||||
|
|
||||||
|
|
||||||
|
def git_sha(repo_root: Path = REPO_ROOT) -> str:
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "rev-parse", "HEAD"],
|
||||||
|
cwd=str(repo_root),
|
||||||
|
text=True,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
capture_output=True,
|
||||||
|
check=False,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
except (OSError, subprocess.SubprocessError):
|
||||||
|
return "unknown"
|
||||||
|
if result.returncode != 0:
|
||||||
|
return "unknown"
|
||||||
|
return result.stdout.strip() or "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def read_default_prompt_version(repo_root: Path = REPO_ROOT) -> str | None:
|
||||||
|
config_path = repo_root / "llm_normalizer" / "backend" / "src" / "config.ts"
|
||||||
|
if not config_path.exists():
|
||||||
|
return None
|
||||||
|
text = config_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
match = re.search(r"DEFAULT_PROMPT_VERSION\s*=\s*process\.env\.DEFAULT_PROMPT_VERSION\s*\?\?\s*\"([^\"]+)\"", text)
|
||||||
|
return match.group(1) if match else None
|
||||||
|
|
||||||
|
|
||||||
|
def load_shared_llm_connection(repo_root: Path = REPO_ROOT) -> dict[str, Any]:
|
||||||
|
config_path = repo_root / "llm_normalizer" / "data" / "shared_llm_connection.json"
|
||||||
|
if not config_path.exists():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
raw = read_json_object(config_path)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return {}
|
||||||
|
connection = raw.get("connection")
|
||||||
|
return dict(connection) if isinstance(connection, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _env_bool(value: str | None, default_value: bool) -> bool:
|
||||||
|
if value is None or value.strip() == "":
|
||||||
|
return default_value
|
||||||
|
lowered = value.strip().lower()
|
||||||
|
return lowered not in {"0", "false", "off", "no"}
|
||||||
|
|
||||||
|
|
||||||
|
def collect_feature_flags(repo_root: Path = REPO_ROOT) -> dict[str, Any]:
|
||||||
|
config_path = repo_root / "llm_normalizer" / "backend" / "src" / "config.ts"
|
||||||
|
if not config_path.exists():
|
||||||
|
return {}
|
||||||
|
text = config_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
pattern = re.compile(
|
||||||
|
r"export\s+const\s+(FEATURE_[A-Z0-9_]+)\s*=\s*toBooleanFlag\(\s*"
|
||||||
|
r"process\.env\.\1\s*,\s*(true|false)\s*\)",
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
flags: dict[str, Any] = {}
|
||||||
|
for name, default_raw in pattern.findall(text):
|
||||||
|
default_value = default_raw == "true"
|
||||||
|
flags[name] = {
|
||||||
|
"value": _env_bool(os.environ.get(name), default_value),
|
||||||
|
"source": "env" if name in os.environ else "default",
|
||||||
|
"default": default_value,
|
||||||
|
}
|
||||||
|
return flags
|
||||||
|
|
||||||
|
|
||||||
|
def _hash_prompt_files(files: list[dict[str, Any]], repo_root: Path) -> str | None:
|
||||||
|
present_files = [item for item in files if item.get("exists") is True]
|
||||||
|
if not present_files:
|
||||||
|
return None
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
for item in sorted(present_files, key=lambda entry: str(entry.get("relative_path") or "")):
|
||||||
|
path = repo_root / str(item["relative_path"])
|
||||||
|
digest.update(str(item["relative_path"]).replace("\\", "/").encode("utf-8"))
|
||||||
|
digest.update(b"\0")
|
||||||
|
digest.update(path.read_bytes())
|
||||||
|
digest.update(b"\0")
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _prompt_files_for_version(repo_root: Path, prompt_version: str) -> list[dict[str, Any]]:
|
||||||
|
definitions = BUILTIN_PROMPT_FILES.get(prompt_version)
|
||||||
|
if not definitions:
|
||||||
|
return []
|
||||||
|
files: list[dict[str, Any]] = []
|
||||||
|
for slot, relative_prompt_path in definitions.items():
|
||||||
|
relative_path = Path("llm_normalizer") / "prompts" / Path(relative_prompt_path)
|
||||||
|
file_path = repo_root / relative_path
|
||||||
|
files.append(
|
||||||
|
{
|
||||||
|
"slot": slot,
|
||||||
|
"prompt_path": relative_prompt_path.replace("\\", "/"),
|
||||||
|
"relative_path": relative_path.as_posix(),
|
||||||
|
"exists": file_path.exists(),
|
||||||
|
"size_bytes": file_path.stat().st_size if file_path.exists() else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
def _preset_prompt_versions(repo_root: Path) -> list[dict[str, Any]]:
|
||||||
|
presets_dir = repo_root / "llm_normalizer" / "data" / "presets"
|
||||||
|
if not presets_dir.exists():
|
||||||
|
return []
|
||||||
|
presets: list[dict[str, Any]] = []
|
||||||
|
for path in sorted(presets_dir.glob("*.json")):
|
||||||
|
try:
|
||||||
|
payload = read_json_object(path)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
presets.append(
|
||||||
|
{
|
||||||
|
"path": repo_relative(path, repo_root),
|
||||||
|
"prompt_version": None,
|
||||||
|
"status": "invalid_json",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
presets.append(
|
||||||
|
{
|
||||||
|
"path": repo_relative(path, repo_root),
|
||||||
|
"prompt_version": str(payload.get("prompt_version") or "").strip() or None,
|
||||||
|
"status": "ok",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return presets
|
||||||
|
|
||||||
|
|
||||||
|
def build_prompt_registry_health(
|
||||||
|
repo_root: Path = REPO_ROOT,
|
||||||
|
*,
|
||||||
|
prompt_version: str | None = None,
|
||||||
|
strict_preset_match: bool = True,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
active_prompt_version = prompt_version or read_default_prompt_version(repo_root) or "unknown"
|
||||||
|
default_prompt_version = read_default_prompt_version(repo_root)
|
||||||
|
files = _prompt_files_for_version(repo_root, active_prompt_version)
|
||||||
|
failures: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
|
||||||
|
if active_prompt_version not in BUILTIN_PROMPT_FILES:
|
||||||
|
failures.append(f"unknown_prompt_version:{active_prompt_version}")
|
||||||
|
|
||||||
|
missing_files = [
|
||||||
|
str(item.get("relative_path"))
|
||||||
|
for item in files
|
||||||
|
if item.get("exists") is not True
|
||||||
|
]
|
||||||
|
if missing_files:
|
||||||
|
failures.append("prompt_files_missing:" + ",".join(missing_files))
|
||||||
|
|
||||||
|
prompt_hash = _hash_prompt_files(files, repo_root)
|
||||||
|
if not prompt_hash:
|
||||||
|
failures.append("prompt_hash_unavailable")
|
||||||
|
|
||||||
|
preset_versions = _preset_prompt_versions(repo_root)
|
||||||
|
mismatched_presets = [
|
||||||
|
item
|
||||||
|
for item in preset_versions
|
||||||
|
if item.get("status") == "ok"
|
||||||
|
and item.get("prompt_version")
|
||||||
|
and default_prompt_version
|
||||||
|
and item.get("prompt_version") != default_prompt_version
|
||||||
|
]
|
||||||
|
if mismatched_presets:
|
||||||
|
message = "preset_version_mismatch:" + ",".join(
|
||||||
|
f"{item['path']}={item['prompt_version']}" for item in mismatched_presets
|
||||||
|
)
|
||||||
|
if strict_preset_match:
|
||||||
|
failures.append(message)
|
||||||
|
else:
|
||||||
|
warnings.append(message)
|
||||||
|
|
||||||
|
invalid_presets = [item for item in preset_versions if item.get("status") != "ok"]
|
||||||
|
if invalid_presets:
|
||||||
|
failures.append("preset_json_invalid:" + ",".join(str(item.get("path")) for item in invalid_presets))
|
||||||
|
|
||||||
|
source = "file" if files and not missing_files else ("unknown" if not files else "partial_file")
|
||||||
|
status = "pass" if not failures else "fail"
|
||||||
|
return {
|
||||||
|
"schema_version": PROMPT_REGISTRY_HEALTH_SCHEMA_VERSION,
|
||||||
|
"status": status,
|
||||||
|
"default_prompt_version": default_prompt_version,
|
||||||
|
"active_prompt_version": active_prompt_version,
|
||||||
|
"prompt_source": source,
|
||||||
|
"prompt_hash": prompt_hash,
|
||||||
|
"prompt_files": files,
|
||||||
|
"prompt_builder": repo_relative(PROMPT_BUILDER_TS, repo_root),
|
||||||
|
"config": repo_relative(CONFIG_TS, repo_root),
|
||||||
|
"preset_versions": preset_versions,
|
||||||
|
"failures": failures,
|
||||||
|
"warnings": warnings,
|
||||||
|
"checked_at": now_utc_iso(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_effective_prompt_version(repo_root: Path, requested_prompt_version: str | None) -> tuple[str, dict[str, Any]]:
|
||||||
|
requested = str(requested_prompt_version or "").strip()
|
||||||
|
default_prompt_version = read_default_prompt_version(repo_root)
|
||||||
|
if not requested:
|
||||||
|
resolved = default_prompt_version or "unknown"
|
||||||
|
return resolved, {
|
||||||
|
"mode": "default_prompt_version",
|
||||||
|
"requested_prompt_version": None,
|
||||||
|
"resolved_prompt_version": resolved,
|
||||||
|
}
|
||||||
|
if requested in BUILTIN_PROMPT_FILES:
|
||||||
|
return requested, {
|
||||||
|
"mode": "requested_prompt_version",
|
||||||
|
"requested_prompt_version": requested,
|
||||||
|
"resolved_prompt_version": requested,
|
||||||
|
}
|
||||||
|
if requested in ASSISTANT_RUNTIME_PROMPT_VERSIONS:
|
||||||
|
resolved = default_prompt_version or "unknown"
|
||||||
|
return resolved, {
|
||||||
|
"mode": "assistant_runtime_schema_uses_default_normalizer_prompt",
|
||||||
|
"requested_prompt_version": requested,
|
||||||
|
"resolved_prompt_version": resolved,
|
||||||
|
"assistant_runtime_prompt_version": requested,
|
||||||
|
}
|
||||||
|
return requested, {
|
||||||
|
"mode": "unknown_prompt_version",
|
||||||
|
"requested_prompt_version": requested,
|
||||||
|
"resolved_prompt_version": requested,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_arg(args: argparse.Namespace | None, name: str, default: Any = None) -> Any:
|
||||||
|
if args is None:
|
||||||
|
return default
|
||||||
|
return getattr(args, name, default)
|
||||||
|
|
||||||
|
|
||||||
|
def build_effective_runtime_manifest(
|
||||||
|
*,
|
||||||
|
runner: str,
|
||||||
|
args: argparse.Namespace | None = None,
|
||||||
|
repo_root: Path = REPO_ROOT,
|
||||||
|
spec_path: Path | None = None,
|
||||||
|
output_dir: Path | None = None,
|
||||||
|
run_id: str | None = None,
|
||||||
|
extra: dict[str, Any] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
requested_prompt_version = str(_get_arg(args, "prompt_version", "") or "").strip() or None
|
||||||
|
prompt_version, prompt_resolution = resolve_effective_prompt_version(repo_root, requested_prompt_version)
|
||||||
|
prompt_health = build_prompt_registry_health(repo_root, prompt_version=prompt_version, strict_preset_match=False)
|
||||||
|
shared_llm = load_shared_llm_connection(repo_root)
|
||||||
|
llm_provider = str(_get_arg(args, "llm_provider", "") or shared_llm.get("llmProvider") or "unknown")
|
||||||
|
llm_model = str(_get_arg(args, "llm_model", "") or shared_llm.get("model") or "unknown")
|
||||||
|
llm_base_url = str(_get_arg(args, "llm_base_url", "") or shared_llm.get("baseUrl") or "")
|
||||||
|
temperature = _get_arg(args, "temperature", shared_llm.get("temperature"))
|
||||||
|
max_output_tokens = _get_arg(args, "max_output_tokens", shared_llm.get("maxOutputTokens"))
|
||||||
|
|
||||||
|
manifest: dict[str, Any] = {
|
||||||
|
"schema_version": EFFECTIVE_RUNTIME_SCHEMA_VERSION,
|
||||||
|
"git_sha": git_sha(repo_root),
|
||||||
|
"runner": runner,
|
||||||
|
"run_id": run_id,
|
||||||
|
"spec_path": repo_relative(spec_path, repo_root) if spec_path else None,
|
||||||
|
"output_dir": repo_relative(output_dir, repo_root) if output_dir else None,
|
||||||
|
"backend_url": _get_arg(args, "backend_url"),
|
||||||
|
"mcp_proxy_url": _get_arg(args, "mcp_proxy_url", os.environ.get("MCP_PROXY_URL") or DEFAULT_MCP_PROXY_URL),
|
||||||
|
"mcp_channel": _get_arg(args, "mcp_channel", os.environ.get("MCP_CHANNEL")),
|
||||||
|
"llm_provider": llm_provider,
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"llm_base_url": llm_base_url or None,
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_output_tokens": max_output_tokens,
|
||||||
|
"requested_prompt_version": prompt_resolution.get("requested_prompt_version"),
|
||||||
|
"prompt_version": prompt_version,
|
||||||
|
"prompt_resolution": prompt_resolution,
|
||||||
|
"assistant_runtime_prompt_version": prompt_resolution.get("assistant_runtime_prompt_version"),
|
||||||
|
"prompt_source": prompt_health.get("prompt_source"),
|
||||||
|
"prompt_hash": prompt_health.get("prompt_hash"),
|
||||||
|
"prompt_registry_status": prompt_health.get("status"),
|
||||||
|
"prompt_registry_failures": prompt_health.get("failures") or [],
|
||||||
|
"prompt_registry_warnings": prompt_health.get("warnings") or [],
|
||||||
|
"prompt_files": prompt_health.get("prompt_files") or [],
|
||||||
|
"feature_flags": collect_feature_flags(repo_root),
|
||||||
|
"shared_llm_connection": {
|
||||||
|
"path": repo_relative(repo_root / "llm_normalizer" / "data" / "shared_llm_connection.json", repo_root),
|
||||||
|
"exists": (repo_root / "llm_normalizer" / "data" / "shared_llm_connection.json").exists(),
|
||||||
|
"connection": shared_llm,
|
||||||
|
},
|
||||||
|
"use_mock": bool(_get_arg(args, "use_mock", False)),
|
||||||
|
"created_at": now_utc_iso(),
|
||||||
|
}
|
||||||
|
if extra:
|
||||||
|
manifest["extra"] = extra
|
||||||
|
return manifest
|
||||||
|
|
||||||
|
|
||||||
|
def write_effective_runtime_manifest(output_dir: Path, manifest: dict[str, Any]) -> Path:
|
||||||
|
manifest_path = output_dir / EFFECTIVE_RUNTIME_FILE_NAME
|
||||||
|
write_json(manifest_path, manifest)
|
||||||
|
return manifest_path
|
||||||
|
|
||||||
|
|
||||||
|
def write_effective_runtime(
|
||||||
|
output_dir: Path,
|
||||||
|
*,
|
||||||
|
runner: str,
|
||||||
|
args: argparse.Namespace | None = None,
|
||||||
|
repo_root: Path = REPO_ROOT,
|
||||||
|
spec_path: Path | None = None,
|
||||||
|
run_id: str | None = None,
|
||||||
|
extra: dict[str, Any] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
manifest = build_effective_runtime_manifest(
|
||||||
|
runner=runner,
|
||||||
|
args=args,
|
||||||
|
repo_root=repo_root,
|
||||||
|
spec_path=spec_path,
|
||||||
|
output_dir=output_dir,
|
||||||
|
run_id=run_id,
|
||||||
|
extra=extra,
|
||||||
|
)
|
||||||
|
write_effective_runtime_manifest(output_dir, manifest)
|
||||||
|
return manifest
|
||||||
|
|
||||||
|
|
||||||
|
def validate_effective_runtime_manifest(manifest: dict[str, Any], *, manifest_path: Path | None = None) -> None:
|
||||||
|
location = f": {manifest_path}" if manifest_path else ""
|
||||||
|
required_fields = (
|
||||||
|
"git_sha",
|
||||||
|
"runner",
|
||||||
|
"llm_model",
|
||||||
|
"temperature",
|
||||||
|
"max_output_tokens",
|
||||||
|
"prompt_version",
|
||||||
|
"prompt_source",
|
||||||
|
"prompt_hash",
|
||||||
|
)
|
||||||
|
missing_fields = [
|
||||||
|
field_name
|
||||||
|
for field_name in required_fields
|
||||||
|
if manifest.get(field_name) is None or str(manifest.get(field_name)).strip() == ""
|
||||||
|
]
|
||||||
|
if missing_fields:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"{EFFECTIVE_RUNTIME_FILE_NAME} is incomplete{location}: missing "
|
||||||
|
+ ", ".join(missing_fields)
|
||||||
|
)
|
||||||
|
if manifest.get("prompt_registry_status") != "pass":
|
||||||
|
failures = manifest.get("prompt_registry_failures")
|
||||||
|
failure_text = ",".join(str(item) for item in failures) if isinstance(failures, list) else str(failures or "")
|
||||||
|
raise RuntimeError(
|
||||||
|
f"{EFFECTIVE_RUNTIME_FILE_NAME} has failing prompt registry status{location}: "
|
||||||
|
f"{manifest.get('prompt_registry_status')}; {failure_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_effective_runtime_manifest(run_dir: Path) -> dict[str, Any]:
|
||||||
|
manifest_path = run_dir / EFFECTIVE_RUNTIME_FILE_NAME
|
||||||
|
if not manifest_path.exists():
|
||||||
|
raise RuntimeError(f"{EFFECTIVE_RUNTIME_FILE_NAME} not found: {manifest_path}")
|
||||||
|
try:
|
||||||
|
manifest = read_json_object(manifest_path)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise RuntimeError(f"{EFFECTIVE_RUNTIME_FILE_NAME} is invalid JSON: {manifest_path}") from exc
|
||||||
|
if manifest.get("schema_version") != EFFECTIVE_RUNTIME_SCHEMA_VERSION:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"{EFFECTIVE_RUNTIME_FILE_NAME} has unsupported schema_version={manifest.get('schema_version')!r}"
|
||||||
|
)
|
||||||
|
validate_effective_runtime_manifest(manifest, manifest_path=manifest_path)
|
||||||
|
return manifest
|
||||||
|
|
@ -13,12 +13,14 @@ from typing import Any
|
||||||
from urllib.error import HTTPError, URLError
|
from urllib.error import HTTPError, URLError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
|
DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
|
||||||
DEFAULT_SESSIONS_DIR = REPO_ROOT / "llm_normalizer" / "data" / "assistant_sessions"
|
DEFAULT_SESSIONS_DIR = REPO_ROOT / "llm_normalizer" / "data" / "assistant_sessions"
|
||||||
DEFAULT_REPORTS_DIR = REPO_ROOT / "llm_normalizer" / "reports"
|
DEFAULT_REPORTS_DIR = REPO_ROOT / "llm_normalizer" / "reports"
|
||||||
DEFAULT_LOOP_SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
|
DEFAULT_LOOP_SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
|
||||||
|
ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json"
|
||||||
SHARED_LLM_CONNECTION_CONFIG = REPO_ROOT / "llm_normalizer" / "data" / "shared_llm_connection.json"
|
SHARED_LLM_CONNECTION_CONFIG = REPO_ROOT / "llm_normalizer" / "data" / "shared_llm_connection.json"
|
||||||
DEFAULT_BACKEND_URL = "http://127.0.0.1:8787"
|
DEFAULT_BACKEND_URL = "http://127.0.0.1:8787"
|
||||||
DEFAULT_PROMPT_VERSION = "address_query_runtime_v1"
|
DEFAULT_PROMPT_VERSION = "address_query_runtime_v1"
|
||||||
|
|
@ -37,6 +39,11 @@ ACTIVE_DOMAIN_CONTRACT_SCHEMA_VERSION = "active_domain_contract_v1"
|
||||||
AUTONOMOUS_LOOP_SCHEMA_VERSION = "domain_autonomous_loop_v1"
|
AUTONOMOUS_LOOP_SCHEMA_VERSION = "domain_autonomous_loop_v1"
|
||||||
REPAIR_MODE_LEAD_HANDOFF = "lead-handoff"
|
REPAIR_MODE_LEAD_HANDOFF = "lead-handoff"
|
||||||
REPAIR_MODE_AUTO_CODER = "auto-coder"
|
REPAIR_MODE_AUTO_CODER = "auto-coder"
|
||||||
|
AUTO_CODER_ALLOWED_ISSUE_CODES = {
|
||||||
|
"business_direct_answer_missing",
|
||||||
|
"business_next_step_missing",
|
||||||
|
"technical_garbage_in_answer",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_shared_local_llm_defaults(config_path: Path | None = None) -> dict[str, Any]:
|
def load_shared_local_llm_defaults(config_path: Path | None = None) -> dict[str, Any]:
|
||||||
|
|
@ -217,6 +224,13 @@ GUARDED_INSUFFICIENCY_LIMITATION_MARKERS = (
|
||||||
GUARDED_INSUFFICIENCY_RESULT_MODES = {"heuristic_candidates"}
|
GUARDED_INSUFFICIENCY_RESULT_MODES = {"heuristic_candidates"}
|
||||||
GUARDED_INSUFFICIENCY_TRUTH_MODES = {"limited"}
|
GUARDED_INSUFFICIENCY_TRUTH_MODES = {"limited"}
|
||||||
GUARDED_INSUFFICIENCY_ANSWER_SHAPES = {"limited_with_reason"}
|
GUARDED_INSUFFICIENCY_ANSWER_SHAPES = {"limited_with_reason"}
|
||||||
|
BUSINESS_EXPECTED_RESULT_MODES = {
|
||||||
|
"clarification_required",
|
||||||
|
"limited_accounting_answer",
|
||||||
|
"evidence_or_honest_boundary",
|
||||||
|
"ranking_or_limited_accounting_answer",
|
||||||
|
"same_inventory_margin_context_or_clarification",
|
||||||
|
}
|
||||||
|
|
||||||
MCP_DISCOVERY_CHAIN_INTENT_ALIASES: dict[str, tuple[str, ...]] = {
|
MCP_DISCOVERY_CHAIN_INTENT_ALIASES: dict[str, tuple[str, ...]] = {
|
||||||
"business_overview": ("business_overview",),
|
"business_overview": ("business_overview",),
|
||||||
|
|
@ -860,6 +874,90 @@ def read_json_file(file_path: Path) -> dict[str, Any]:
|
||||||
return json.loads(read_text_file(file_path))
|
return json.loads(read_text_file(file_path))
|
||||||
|
|
||||||
|
|
||||||
|
def load_issue_catalog(path: Path = ISSUE_CATALOG_PATH) -> dict[str, Any]:
|
||||||
|
if not path.exists():
|
||||||
|
return {"schema_version": "agent_issue_catalog_v1", "issues": {}}
|
||||||
|
try:
|
||||||
|
payload = read_json_file(path)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return {"schema_version": "agent_issue_catalog_v1", "issues": {}}
|
||||||
|
return payload if isinstance(payload, dict) else {"schema_version": "agent_issue_catalog_v1", "issues": {}}
|
||||||
|
|
||||||
|
|
||||||
|
def issue_catalog_entry(issue_code: str, catalog: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
source = catalog if isinstance(catalog, dict) else load_issue_catalog()
|
||||||
|
issues = source.get("issues") if isinstance(source.get("issues"), dict) else {}
|
||||||
|
entry = issues.get(issue_code)
|
||||||
|
return dict(entry) if isinstance(entry, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def default_rerun_matrix_for_problem(problem_type: str) -> list[str]:
|
||||||
|
if problem_type in {"route_gap", "capability_gap", "route_candidate_enablement_gap"}:
|
||||||
|
return ["failed_scenario", "route_neighbor_pack", "accepted_smoke_pack"]
|
||||||
|
if problem_type in {"answer_shape_mismatch", "presentation_gap", "business_utility_gap"}:
|
||||||
|
return ["failed_scenario", "answer_surface_pack", "accepted_smoke_pack"]
|
||||||
|
if problem_type in {"field_mapping_gap", "evidence_gap", "domain_anchor_gap"}:
|
||||||
|
return ["failed_scenario", "field_truth_pack", "accepted_smoke_pack"]
|
||||||
|
return ["failed_scenario", "accepted_smoke_pack"]
|
||||||
|
|
||||||
|
|
||||||
|
def is_margin_profitability_step(step_output: dict[str, Any]) -> bool:
|
||||||
|
question = str(step_output.get("question_resolved") or step_output.get("question_template") or "")
|
||||||
|
if is_nomenclature_margin_context(step_output, question):
|
||||||
|
return True
|
||||||
|
tokens = [
|
||||||
|
str(step_output.get("expected_business_answer_contract") or ""),
|
||||||
|
str(step_output.get("required_answer_contract") or ""),
|
||||||
|
*normalize_string_list(step_output.get("semantic_tags")),
|
||||||
|
]
|
||||||
|
return any("margin" in token or "марж" in token.casefold() for token in tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def derive_repair_issue_code(step_output: dict[str, Any], problem_type: str) -> str:
|
||||||
|
violated = normalize_string_list(step_output.get("violated_invariants"))
|
||||||
|
if "domain_leak_accounting_route" in violated and is_margin_profitability_step(step_output):
|
||||||
|
return "margin_domain_leak_accounting_route"
|
||||||
|
for issue_code in (
|
||||||
|
"technical_garbage_in_answer",
|
||||||
|
"business_direct_answer_missing",
|
||||||
|
"accounting_contract_missing",
|
||||||
|
"business_next_step_missing",
|
||||||
|
):
|
||||||
|
if issue_code in violated:
|
||||||
|
return issue_code
|
||||||
|
if problem_type == "route_candidate_enablement_gap":
|
||||||
|
return "route_candidate_enablement_gap"
|
||||||
|
if problem_type == "capability_gap":
|
||||||
|
return "capability_gap"
|
||||||
|
return problem_type or "other"
|
||||||
|
|
||||||
|
|
||||||
|
def expected_answer_contract_for_issue(issue_code: str, step_output: dict[str, Any], catalog_entry: dict[str, Any]) -> str | None:
|
||||||
|
explicit_contract = (
|
||||||
|
str(step_output.get("expected_business_answer_contract") or step_output.get("required_answer_contract") or "").strip()
|
||||||
|
or None
|
||||||
|
)
|
||||||
|
if explicit_contract:
|
||||||
|
return explicit_contract
|
||||||
|
catalog_contract = str(catalog_entry.get("expected_answer_contract") or "").strip()
|
||||||
|
if catalog_contract:
|
||||||
|
return catalog_contract
|
||||||
|
if issue_code.startswith("margin_") or is_margin_profitability_step(step_output):
|
||||||
|
return "margin_profitability_v1"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def evidence_paths_for_step(scenario_dir: Path, step_id: str) -> list[str]:
|
||||||
|
step_dir = scenario_dir / "steps" / step_id
|
||||||
|
candidates = [
|
||||||
|
step_dir / "output.md",
|
||||||
|
step_dir / "turn.json",
|
||||||
|
step_dir / "step_state.json",
|
||||||
|
step_dir / "debug.json",
|
||||||
|
]
|
||||||
|
return [repo_relative(path) for path in candidates]
|
||||||
|
|
||||||
|
|
||||||
def extract_conversation_from_session(session_record: dict[str, Any]) -> list[dict[str, Any]]:
|
def extract_conversation_from_session(session_record: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
items = session_record.get("items")
|
items = session_record.get("items")
|
||||||
if isinstance(items, list) and items:
|
if isinstance(items, list) and items:
|
||||||
|
|
@ -2188,6 +2286,75 @@ def is_validated_guarded_insufficiency_answer(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _business_review_is_clean(step_state: dict[str, Any]) -> bool:
|
||||||
|
business_review = step_state.get("business_first_review")
|
||||||
|
if not isinstance(business_review, dict):
|
||||||
|
return True
|
||||||
|
return len(normalize_string_list(business_review.get("issue_codes"))) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def business_expected_result_mode_matches(expected_result_mode: str, step_state: dict[str, Any]) -> bool:
|
||||||
|
reply_type = str(step_state.get("reply_type") or "").strip()
|
||||||
|
response_type = str(step_state.get("response_type") or "").strip()
|
||||||
|
truth_mode = str(step_state.get("truth_mode") or "").strip()
|
||||||
|
answer_shape = str(step_state.get("answer_shape") or "").strip()
|
||||||
|
detected_intent = str(step_state.get("detected_intent") or "").strip()
|
||||||
|
capability_id = str(step_state.get("capability_id") or "").strip()
|
||||||
|
assistant_text = str(step_state.get("assistant_text") or "").strip()
|
||||||
|
clean_business_review = _business_review_is_clean(step_state)
|
||||||
|
in_margin_context = (
|
||||||
|
detected_intent == "inventory_margin_ranking_for_nomenclature"
|
||||||
|
or capability_id == "inventory_inventory_margin_ranking_for_nomenclature"
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_result_mode == "clarification_required":
|
||||||
|
return (
|
||||||
|
clean_business_review
|
||||||
|
and (
|
||||||
|
truth_mode == "clarification_required"
|
||||||
|
or answer_shape == "clarification_required"
|
||||||
|
or (reply_type == "partial_coverage" and response_type == "LIMITED_WITH_REASON")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_result_mode == "limited_accounting_answer":
|
||||||
|
return (
|
||||||
|
clean_business_review
|
||||||
|
and in_margin_context
|
||||||
|
and bool(assistant_text)
|
||||||
|
and reply_type in {"partial_coverage", "factual", "factual_with_explanation"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_result_mode == "evidence_or_honest_boundary":
|
||||||
|
return (
|
||||||
|
clean_business_review
|
||||||
|
and bool(assistant_text)
|
||||||
|
and reply_type in {"partial_coverage", "factual", "factual_with_explanation"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_result_mode == "ranking_or_limited_accounting_answer":
|
||||||
|
return (
|
||||||
|
clean_business_review
|
||||||
|
and in_margin_context
|
||||||
|
and bool(assistant_text)
|
||||||
|
and reply_type in {"partial_coverage", "factual", "factual_with_explanation"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_result_mode == "same_inventory_margin_context_or_clarification":
|
||||||
|
return (
|
||||||
|
clean_business_review
|
||||||
|
and bool(assistant_text)
|
||||||
|
and (
|
||||||
|
in_margin_context
|
||||||
|
or truth_mode == "clarification_required"
|
||||||
|
or answer_shape == "clarification_required"
|
||||||
|
)
|
||||||
|
and reply_type in {"partial_coverage", "factual", "factual_with_explanation"}
|
||||||
|
)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def acceptance_status_from_execution(execution_status: str, hard_fail: bool, semantic_validated: bool = False) -> str:
|
def acceptance_status_from_execution(execution_status: str, hard_fail: bool, semantic_validated: bool = False) -> str:
|
||||||
if execution_status == "blocked":
|
if execution_status == "blocked":
|
||||||
return "blocked"
|
return "blocked"
|
||||||
|
|
@ -2232,7 +2399,11 @@ def validate_step_contract(step_state: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
|
||||||
expected_result_mode = str(state.get("expected_result_mode") or "").strip()
|
expected_result_mode = str(state.get("expected_result_mode") or "").strip()
|
||||||
actual_result_mode = str(state.get("result_mode") or "").strip()
|
actual_result_mode = str(state.get("result_mode") or "").strip()
|
||||||
if expected_result_mode and actual_result_mode and normalize_identifier(actual_result_mode) != normalize_identifier(expected_result_mode):
|
normalized_expected_result_mode = normalize_identifier(expected_result_mode)
|
||||||
|
if normalized_expected_result_mode in BUSINESS_EXPECTED_RESULT_MODES:
|
||||||
|
if not business_expected_result_mode_matches(normalized_expected_result_mode, state):
|
||||||
|
violated_invariants.append("wrong_result_mode")
|
||||||
|
elif expected_result_mode and actual_result_mode and normalize_identifier(actual_result_mode) != normalize_identifier(expected_result_mode):
|
||||||
violated_invariants.append("wrong_result_mode")
|
violated_invariants.append("wrong_result_mode")
|
||||||
|
|
||||||
for forbidden_capability in normalize_string_list(state.get("forbidden_capabilities")):
|
for forbidden_capability in normalize_string_list(state.get("forbidden_capabilities")):
|
||||||
|
|
@ -2716,6 +2887,23 @@ def execute_scenario_manifest(
|
||||||
write_json(scenario_dir / "scenario_manifest.json", manifest)
|
write_json(scenario_dir / "scenario_manifest.json", manifest)
|
||||||
if manifest_source_label:
|
if manifest_source_label:
|
||||||
write_text(scenario_dir / "manifest_source.txt", f"{manifest_source_label}\n")
|
write_text(scenario_dir / "manifest_source.txt", f"{manifest_source_label}\n")
|
||||||
|
source_path = None
|
||||||
|
if manifest_source_label:
|
||||||
|
raw_source_path = str(manifest_source_label).split("#", 1)[0].strip()
|
||||||
|
if raw_source_path:
|
||||||
|
source_path = Path(raw_source_path).resolve()
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
scenario_dir,
|
||||||
|
runner="domain_case_loop.run-scenario",
|
||||||
|
args=args,
|
||||||
|
spec_path=source_path,
|
||||||
|
run_id=manifest["scenario_id"],
|
||||||
|
extra={
|
||||||
|
"domain": manifest["domain"],
|
||||||
|
"title": manifest["title"],
|
||||||
|
"manifest_source_label": manifest_source_label,
|
||||||
|
},
|
||||||
|
)
|
||||||
ensure_scenario_brief(scenario_dir, manifest)
|
ensure_scenario_brief(scenario_dir, manifest)
|
||||||
|
|
||||||
scenario_state: dict[str, Any] = {
|
scenario_state: dict[str, Any] = {
|
||||||
|
|
@ -2845,6 +3033,19 @@ def handle_run_case(args: argparse.Namespace) -> int:
|
||||||
case_id = slugify_case_id(args.domain, args.case_id)
|
case_id = slugify_case_id(args.domain, args.case_id)
|
||||||
case_dir = Path(args.output_root).resolve() / case_id
|
case_dir = Path(args.output_root).resolve() / case_id
|
||||||
case_dir.mkdir(parents=True, exist_ok=True)
|
case_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
case_dir,
|
||||||
|
runner="domain_case_loop.run-case",
|
||||||
|
args=args,
|
||||||
|
run_id=case_id,
|
||||||
|
extra={
|
||||||
|
"domain": args.domain,
|
||||||
|
"case_id": case_id,
|
||||||
|
"slot": args.slot,
|
||||||
|
"expected_capability": args.expected_capability,
|
||||||
|
"expected_result_mode": args.expected_result_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
ensure_case_brief(
|
ensure_case_brief(
|
||||||
case_dir,
|
case_dir,
|
||||||
domain=args.domain,
|
domain=args.domain,
|
||||||
|
|
@ -3809,6 +4010,7 @@ def build_step_repair_target(
|
||||||
signals.append(f"route_candidate_missing_axes={','.join(missing_axes)}")
|
signals.append(f"route_candidate_missing_axes={','.join(missing_axes)}")
|
||||||
|
|
||||||
target = {
|
target = {
|
||||||
|
"issue_code": derive_repair_issue_code(step_output, problem_type),
|
||||||
"target_id": f"{scenario_id}:{step_id}",
|
"target_id": f"{scenario_id}:{step_id}",
|
||||||
"scenario_id": scenario_id,
|
"scenario_id": scenario_id,
|
||||||
"scenario_title": scenario_title,
|
"scenario_title": scenario_title,
|
||||||
|
|
@ -3829,6 +4031,17 @@ def build_step_repair_target(
|
||||||
"step_state_json": str(step_state_path),
|
"step_state_json": str(step_state_path),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
catalog_entry = issue_catalog_entry(str(target["issue_code"]))
|
||||||
|
target["expected_business_answer_contract"] = expected_answer_contract_for_issue(
|
||||||
|
str(target["issue_code"]),
|
||||||
|
step_output,
|
||||||
|
catalog_entry,
|
||||||
|
)
|
||||||
|
target["evidence_paths"] = evidence_paths_for_step(scenario_dir, step_id)
|
||||||
|
target["allowed_patch_targets"] = normalize_string_list(catalog_entry.get("allowed_patch_targets")) or target["candidate_files"]
|
||||||
|
target["forbidden_patch_targets"] = normalize_string_list(catalog_entry.get("forbidden_patch_targets"))
|
||||||
|
target["rerun_matrix"] = normalize_string_list(catalog_entry.get("rerun_matrix")) or default_rerun_matrix_for_problem(problem_type)
|
||||||
|
target["minimal_patch_direction"] = target["fix_goal"]
|
||||||
route_candidate = compact_route_candidate_handoff(scenario_id=scenario_id, step_id=step_id, step_output=step_output)
|
route_candidate = compact_route_candidate_handoff(scenario_id=scenario_id, step_id=step_id, step_output=step_output)
|
||||||
if route_candidate:
|
if route_candidate:
|
||||||
target["route_candidate"] = route_candidate
|
target["route_candidate"] = route_candidate
|
||||||
|
|
@ -3856,10 +4069,21 @@ def build_priority_repair_foci(targets: list[dict[str, Any]]) -> list[dict[str,
|
||||||
"problem_type": str(target.get("problem_type") or "other"),
|
"problem_type": str(target.get("problem_type") or "other"),
|
||||||
"root_cause_layers": normalize_string_list(target.get("root_cause_layers")),
|
"root_cause_layers": normalize_string_list(target.get("root_cause_layers")),
|
||||||
"candidate_files": normalize_string_list(target.get("candidate_files")),
|
"candidate_files": normalize_string_list(target.get("candidate_files")),
|
||||||
|
"issue_codes": [],
|
||||||
|
"allowed_patch_targets": [],
|
||||||
|
"forbidden_patch_targets": [],
|
||||||
|
"rerun_matrix": [],
|
||||||
"target_ids": [],
|
"target_ids": [],
|
||||||
"scenario_ids": set(),
|
"scenario_ids": set(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
issue_code = str(target.get("issue_code") or "").strip()
|
||||||
|
if issue_code and issue_code not in focus["issue_codes"]:
|
||||||
|
focus["issue_codes"].append(issue_code)
|
||||||
|
for field_name in ("allowed_patch_targets", "forbidden_patch_targets", "rerun_matrix"):
|
||||||
|
for item in normalize_string_list(target.get(field_name)):
|
||||||
|
if item not in focus[field_name]:
|
||||||
|
focus[field_name].append(item)
|
||||||
focus["target_ids"].append(str(target.get("target_id") or ""))
|
focus["target_ids"].append(str(target.get("target_id") or ""))
|
||||||
scenario_id = str(target.get("scenario_id") or "").strip()
|
scenario_id = str(target.get("scenario_id") or "").strip()
|
||||||
if scenario_id:
|
if scenario_id:
|
||||||
|
|
@ -4024,7 +4248,10 @@ def normalize_analyst_priority_repair_target(raw_target: dict[str, Any], index:
|
||||||
fix_goal = f"Resolve the analyst-identified `{problem_type}` on `{scenario_id}:{step_id}` without masking partial evidence as accepted."
|
fix_goal = f"Resolve the analyst-identified `{problem_type}` on `{scenario_id}:{step_id}` without masking partial evidence as accepted."
|
||||||
if not root_cause_layers:
|
if not root_cause_layers:
|
||||||
root_cause_layers = [problem_type]
|
root_cause_layers = [problem_type]
|
||||||
|
issue_code = str(raw_target.get("issue_code") or problem_type or "other").strip()
|
||||||
|
catalog_entry = issue_catalog_entry(issue_code)
|
||||||
return {
|
return {
|
||||||
|
"issue_code": issue_code,
|
||||||
"target_id": f"{scenario_id}:{step_id}",
|
"target_id": f"{scenario_id}:{step_id}",
|
||||||
"scenario_id": scenario_id,
|
"scenario_id": scenario_id,
|
||||||
"scenario_title": str(raw_target.get("scenario_title") or "").strip() or None,
|
"scenario_title": str(raw_target.get("scenario_title") or "").strip() or None,
|
||||||
|
|
@ -4039,6 +4266,23 @@ def normalize_analyst_priority_repair_target(raw_target: dict[str, Any], index:
|
||||||
"violated_invariants": normalize_string_list(raw_target.get("violated_invariants")),
|
"violated_invariants": normalize_string_list(raw_target.get("violated_invariants")),
|
||||||
"fix_goal": fix_goal,
|
"fix_goal": fix_goal,
|
||||||
"candidate_files": candidate_files,
|
"candidate_files": candidate_files,
|
||||||
|
"expected_business_answer_contract": str(
|
||||||
|
raw_target.get("expected_business_answer_contract")
|
||||||
|
or raw_target.get("expected_answer_contract")
|
||||||
|
or catalog_entry.get("expected_answer_contract")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
or None,
|
||||||
|
"evidence_paths": normalize_string_list(raw_target.get("evidence_paths")),
|
||||||
|
"allowed_patch_targets": normalize_string_list(raw_target.get("allowed_patch_targets"))
|
||||||
|
or normalize_string_list(catalog_entry.get("allowed_patch_targets"))
|
||||||
|
or candidate_files,
|
||||||
|
"forbidden_patch_targets": normalize_string_list(raw_target.get("forbidden_patch_targets"))
|
||||||
|
or normalize_string_list(catalog_entry.get("forbidden_patch_targets")),
|
||||||
|
"rerun_matrix": normalize_string_list(raw_target.get("rerun_matrix"))
|
||||||
|
or normalize_string_list(catalog_entry.get("rerun_matrix"))
|
||||||
|
or default_rerun_matrix_for_problem(problem_type),
|
||||||
|
"minimal_patch_direction": str(raw_target.get("minimal_patch_direction") or fix_goal).strip() or fix_goal,
|
||||||
"signals": ["analyst_priority_target"],
|
"signals": ["analyst_priority_target"],
|
||||||
"target_source": "analyst_verdict.priority_targets",
|
"target_source": "analyst_verdict.priority_targets",
|
||||||
}
|
}
|
||||||
|
|
@ -4253,6 +4497,80 @@ def select_primary_repair_focus(repair_targets: dict[str, Any]) -> dict[str, Any
|
||||||
return primary_focus if isinstance(primary_focus, dict) else None
|
return primary_focus if isinstance(primary_focus, dict) else None
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_auto_coder_gate(
|
||||||
|
repair_targets: dict[str, Any],
|
||||||
|
assigned_focus: dict[str, Any] | None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
issue_codes = normalize_string_list((assigned_focus or {}).get("issue_codes"))
|
||||||
|
root_layers = normalize_string_list((assigned_focus or {}).get("root_cause_layers"))
|
||||||
|
allowed_patch_targets = normalize_string_list((assigned_focus or {}).get("allowed_patch_targets"))
|
||||||
|
forbidden_patch_targets = normalize_string_list((assigned_focus or {}).get("forbidden_patch_targets"))
|
||||||
|
rerun_matrix = normalize_string_list((assigned_focus or {}).get("rerun_matrix"))
|
||||||
|
focus_id = str((assigned_focus or {}).get("focus_id") or "").strip() or None
|
||||||
|
blocking_reasons: list[str] = []
|
||||||
|
|
||||||
|
if not assigned_focus:
|
||||||
|
blocking_reasons.append("missing_assigned_focus")
|
||||||
|
if not issue_codes:
|
||||||
|
blocking_reasons.append("missing_issue_code")
|
||||||
|
for issue_code in issue_codes:
|
||||||
|
if issue_code not in AUTO_CODER_ALLOWED_ISSUE_CODES:
|
||||||
|
blocking_reasons.append(f"issue_code_not_allowlisted:{issue_code}")
|
||||||
|
if not root_layers:
|
||||||
|
blocking_reasons.append("missing_root_layers")
|
||||||
|
if not allowed_patch_targets:
|
||||||
|
blocking_reasons.append("missing_allowed_patch_targets")
|
||||||
|
if not forbidden_patch_targets:
|
||||||
|
blocking_reasons.append("missing_forbidden_patch_targets")
|
||||||
|
if not rerun_matrix:
|
||||||
|
blocking_reasons.append("missing_rerun_matrix")
|
||||||
|
if rerun_matrix and "accepted_smoke_pack" not in rerun_matrix:
|
||||||
|
blocking_reasons.append("missing_accepted_smoke_pack")
|
||||||
|
|
||||||
|
target_items = repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []
|
||||||
|
focus_target_ids = set(normalize_string_list((assigned_focus or {}).get("target_ids")))
|
||||||
|
focus_targets = [
|
||||||
|
target
|
||||||
|
for target in target_items
|
||||||
|
if isinstance(target, dict) and str(target.get("target_id") or "").strip() in focus_target_ids
|
||||||
|
]
|
||||||
|
if not focus_targets and assigned_focus:
|
||||||
|
blocking_reasons.append("missing_focus_targets")
|
||||||
|
for target in focus_targets:
|
||||||
|
target_id = str(target.get("target_id") or "").strip() or "unknown_target"
|
||||||
|
target_issue = str(target.get("issue_code") or "").strip()
|
||||||
|
if not target_issue:
|
||||||
|
blocking_reasons.append(f"target_missing_issue_code:{target_id}")
|
||||||
|
if not normalize_string_list(target.get("allowed_patch_targets")):
|
||||||
|
blocking_reasons.append(f"target_missing_allowed_patch_targets:{target_id}")
|
||||||
|
if not normalize_string_list(target.get("forbidden_patch_targets")):
|
||||||
|
blocking_reasons.append(f"target_missing_forbidden_patch_targets:{target_id}")
|
||||||
|
if not normalize_string_list(target.get("rerun_matrix")):
|
||||||
|
blocking_reasons.append(f"target_missing_rerun_matrix:{target_id}")
|
||||||
|
|
||||||
|
allowed = not blocking_reasons
|
||||||
|
return {
|
||||||
|
"schema_version": "auto_coder_gate_v1",
|
||||||
|
"allowed": allowed,
|
||||||
|
"mode": REPAIR_MODE_AUTO_CODER,
|
||||||
|
"focus_id": focus_id,
|
||||||
|
"issue_codes": issue_codes,
|
||||||
|
"root_layers": root_layers,
|
||||||
|
"allowed_patch_targets": allowed_patch_targets,
|
||||||
|
"forbidden_patch_targets": forbidden_patch_targets,
|
||||||
|
"rerun_matrix": rerun_matrix,
|
||||||
|
"allowlisted_issue_codes": sorted(AUTO_CODER_ALLOWED_ISSUE_CODES),
|
||||||
|
"blocking_reasons": blocking_reasons,
|
||||||
|
"reason": "auto_coder_gate_passed" if allowed else ";".join(blocking_reasons),
|
||||||
|
"policy": {
|
||||||
|
"auto_coder_default": False,
|
||||||
|
"requires_issue_catalog_contract": True,
|
||||||
|
"requires_accepted_smoke_pack": True,
|
||||||
|
"lead_owns_merge_and_acceptance": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_repair_targets_summary(repair_targets: dict[str, Any]) -> str:
|
def build_repair_targets_summary(repair_targets: dict[str, Any]) -> str:
|
||||||
lines = [
|
lines = [
|
||||||
"# Repair targets",
|
"# Repair targets",
|
||||||
|
|
@ -4535,6 +4853,7 @@ def build_coder_loop_prompt(
|
||||||
assigned_focus: dict[str, Any] | None,
|
assigned_focus: dict[str, Any] | None,
|
||||||
analyst_verdict_path: Path,
|
analyst_verdict_path: Path,
|
||||||
analyst_verdict_json: str,
|
analyst_verdict_json: str,
|
||||||
|
auto_coder_gate_json: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
assigned_focus_block = (
|
assigned_focus_block = (
|
||||||
textwrap.dedent(
|
textwrap.dedent(
|
||||||
|
|
@ -4548,6 +4867,17 @@ def build_coder_loop_prompt(
|
||||||
if assigned_focus
|
if assigned_focus
|
||||||
else "Assigned deterministic repair focus for this iteration: none"
|
else "Assigned deterministic repair focus for this iteration: none"
|
||||||
)
|
)
|
||||||
|
auto_coder_gate_block = ""
|
||||||
|
if auto_coder_gate_json:
|
||||||
|
auto_coder_gate_block = textwrap.dedent(
|
||||||
|
f"""\
|
||||||
|
|
||||||
|
Auto-coder gate:
|
||||||
|
```json
|
||||||
|
{auto_coder_gate_json}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
)
|
||||||
return textwrap.dedent(
|
return textwrap.dedent(
|
||||||
f"""\
|
f"""\
|
||||||
You are the `domain_coder` for NDC_1C.
|
You are the `domain_coder` for NDC_1C.
|
||||||
|
|
@ -4576,6 +4906,8 @@ def build_coder_loop_prompt(
|
||||||
- use `root_cause_layers`, `broken_edge_ids`, `violated_invariants`, and business-utility scores from the analyst verdict to choose the smallest fix;
|
- use `root_cause_layers`, `broken_edge_ids`, `violated_invariants`, and business-utility scores from the analyst verdict to choose the smallest fix;
|
||||||
- use the deterministic repair targets to choose the highest-leverage repair focus first; within that focus, patch the narrowest shared layer that can clear the most `P0`/`P1` targets without architecture drift;
|
- use the deterministic repair targets to choose the highest-leverage repair focus first; within that focus, patch the narrowest shared layer that can clear the most `P0`/`P1` targets without architecture drift;
|
||||||
- the assigned deterministic repair focus below is mandatory for this iteration; do not switch to a lower-priority focus unless you are blocked from making a safe patch for the assigned focus;
|
- the assigned deterministic repair focus below is mandatory for this iteration; do not switch to a lower-priority focus unless you are blocked from making a safe patch for the assigned focus;
|
||||||
|
- auto-coder mode is allowed only for the issue codes and patch targets passed by the auto-coder gate; do not edit outside `allowed_patch_targets`;
|
||||||
|
- do not touch any `forbidden_patch_targets`; if the fix needs one, return `blocked` instead of patching;
|
||||||
- if the analyst verdict is optimistic but deterministic repair targets still contain `P0` or `P1`, trust the deterministic repair targets and keep fixing the pack;
|
- if the analyst verdict is optimistic but deterministic repair targets still contain `P0` or `P1`, trust the deterministic repair targets and keep fixing the pack;
|
||||||
- prioritize state continuity, selected-object persistence, stable `focus_object`, stable `answer_object`, reusable `provenance_bundle` / `sale_trace_bundle`, action-first answer behavior, compact micro-action answers, answer layering, temporal honesty, and field-truth mapping when those are the blocking layers;
|
- prioritize state continuity, selected-object persistence, stable `focus_object`, stable `answer_object`, reusable `provenance_bundle` / `sale_trace_bundle`, action-first answer behavior, compact micro-action answers, answer layering, temporal honesty, and field-truth mapping when those are the blocking layers;
|
||||||
- do not broaden scope when the analyst says the defect is mainly `object_memory_gap`, `followup_action_resolution_gap`, `bundle_reuse_gap`, `field_mapping_gap`, `temporal_honesty_gap`, `answer_shape_mismatch`, or `business_utility_gap`;
|
- do not broaden scope when the analyst says the defect is mainly `object_memory_gap`, `followup_action_resolution_gap`, `bundle_reuse_gap`, `field_mapping_gap`, `temporal_honesty_gap`, `answer_shape_mismatch`, or `business_utility_gap`;
|
||||||
|
|
@ -4596,6 +4928,7 @@ def build_coder_loop_prompt(
|
||||||
```
|
```
|
||||||
|
|
||||||
{assigned_focus_block}
|
{assigned_focus_block}
|
||||||
|
{auto_coder_gate_block}
|
||||||
|
|
||||||
- then return JSON only and follow the schema exactly.
|
- then return JSON only and follow the schema exactly.
|
||||||
"""
|
"""
|
||||||
|
|
@ -4638,6 +4971,191 @@ def _limited_dict_items(raw_items: Any, limit: int = 5) -> list[dict[str, Any]]:
|
||||||
return [item for item in raw_items[:limit] if isinstance(item, dict)]
|
return [item for item in raw_items[:limit] if isinstance(item, dict)]
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_business_audit_status(loop_decision: str) -> str:
|
||||||
|
normalized = str(loop_decision or "").strip()
|
||||||
|
if normalized in {"accepted", "partial", "blocked", "needs_exact_capability"}:
|
||||||
|
return normalized
|
||||||
|
if normalized == "continue":
|
||||||
|
return "partial"
|
||||||
|
return "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def collect_rerun_matrix(repair_targets: dict[str, Any]) -> list[str]:
|
||||||
|
matrix: list[str] = []
|
||||||
|
catalog = load_issue_catalog()
|
||||||
|
for target in repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []:
|
||||||
|
if not isinstance(target, dict):
|
||||||
|
continue
|
||||||
|
issue_code = str(target.get("issue_code") or "").strip()
|
||||||
|
catalog_entry = issue_catalog_entry(issue_code, catalog) if issue_code else {}
|
||||||
|
target_matrix = normalize_string_list(target.get("rerun_matrix")) or normalize_string_list(
|
||||||
|
catalog_entry.get("rerun_matrix")
|
||||||
|
)
|
||||||
|
for item in target_matrix:
|
||||||
|
if item not in matrix:
|
||||||
|
matrix.append(item)
|
||||||
|
return matrix or ["failed_scenario", "accepted_smoke_pack"]
|
||||||
|
|
||||||
|
|
||||||
|
def build_issue_catalog_snapshot(repair_targets: dict[str, Any], catalog: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
source = catalog if isinstance(catalog, dict) else load_issue_catalog()
|
||||||
|
issues = source.get("issues") if isinstance(source.get("issues"), dict) else {}
|
||||||
|
observed_codes = sorted(
|
||||||
|
{
|
||||||
|
str(target.get("issue_code") or "").strip()
|
||||||
|
for target in repair_targets.get("targets", [])
|
||||||
|
if isinstance(target, dict) and str(target.get("issue_code") or "").strip()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"schema_version": "issue_catalog_snapshot_v1",
|
||||||
|
"source_catalog": repo_relative(ISSUE_CATALOG_PATH),
|
||||||
|
"observed_issue_codes": observed_codes,
|
||||||
|
"issues": {
|
||||||
|
issue_code: issues.get(issue_code)
|
||||||
|
for issue_code in observed_codes
|
||||||
|
if isinstance(issues.get(issue_code), dict)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
source = catalog if isinstance(catalog, dict) else load_issue_catalog()
|
||||||
|
issues = source.get("issues") if isinstance(source.get("issues"), dict) else {}
|
||||||
|
candidates: list[dict[str, Any]] = []
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
for target in repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []:
|
||||||
|
if not isinstance(target, dict):
|
||||||
|
continue
|
||||||
|
issue_code = str(target.get("issue_code") or "").strip()
|
||||||
|
entry = issues.get(issue_code) if isinstance(issues.get(issue_code), dict) else {}
|
||||||
|
detectors = normalize_string_list(entry.get("detectors"))
|
||||||
|
if not detectors and issue_code:
|
||||||
|
detectors = [f"{issue_code}_detector"]
|
||||||
|
for detector in detectors:
|
||||||
|
key = (issue_code, detector)
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
candidates.append(
|
||||||
|
{
|
||||||
|
"issue_code": issue_code,
|
||||||
|
"detector": detector,
|
||||||
|
"severity": target.get("severity"),
|
||||||
|
"sample_target_id": target.get("target_id"),
|
||||||
|
"evidence_paths": target.get("evidence_paths") or [],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"schema_version": "detector_candidates_v1",
|
||||||
|
"candidate_count": len(candidates),
|
||||||
|
"candidates": candidates,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_blocking_issue_contract(target: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
issue_code = str(target.get("issue_code") or target.get("problem_type") or "other").strip()
|
||||||
|
entry = issue_catalog_entry(issue_code, catalog)
|
||||||
|
return {
|
||||||
|
"issue_code": issue_code,
|
||||||
|
"severity": target.get("severity"),
|
||||||
|
"domain": target.get("scenario_id") or target.get("domain"),
|
||||||
|
"scenario_id": target.get("scenario_id"),
|
||||||
|
"step_id": target.get("step_id"),
|
||||||
|
"user_question": target.get("question_resolved"),
|
||||||
|
"expected_business_answer_contract": target.get("expected_business_answer_contract")
|
||||||
|
or entry.get("expected_answer_contract"),
|
||||||
|
"actual_answer_path": (target.get("evidence_paths") or [None])[0],
|
||||||
|
"evidence_paths": target.get("evidence_paths") or [],
|
||||||
|
"root_layers": target.get("root_cause_layers") or entry.get("root_layers") or [],
|
||||||
|
"business_mismatch": target.get("fix_goal") or entry.get("business_meaning"),
|
||||||
|
"minimal_patch_direction": target.get("minimal_patch_direction") or target.get("fix_goal"),
|
||||||
|
"allowed_patch_targets": target.get("allowed_patch_targets")
|
||||||
|
or entry.get("allowed_patch_targets")
|
||||||
|
or target.get("candidate_files")
|
||||||
|
or [],
|
||||||
|
"forbidden_patch_targets": target.get("forbidden_patch_targets") or entry.get("forbidden_patch_targets") or [],
|
||||||
|
"rerun_matrix": target.get("rerun_matrix") or entry.get("rerun_matrix") or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_business_audit_contract(
|
||||||
|
*,
|
||||||
|
analyst_verdict: dict[str, Any],
|
||||||
|
repair_targets: dict[str, Any],
|
||||||
|
target_score: int,
|
||||||
|
loop_decision: str,
|
||||||
|
analyst_accepted_gate: bool,
|
||||||
|
accepted_gate: bool,
|
||||||
|
deterministic_gate_ok: bool,
|
||||||
|
deterministic_gate_reason: str,
|
||||||
|
business_audit_markdown_path: Path,
|
||||||
|
analyst_verdict_path: Path,
|
||||||
|
repair_targets_path: Path,
|
||||||
|
business_audit_json_path: Path | None = None,
|
||||||
|
issue_catalog_snapshot_path: Path | None = None,
|
||||||
|
rerun_matrix_path: Path | None = None,
|
||||||
|
detector_candidates_path: Path | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
catalog = load_issue_catalog()
|
||||||
|
targets = repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []
|
||||||
|
blocking_issues = [
|
||||||
|
build_blocking_issue_contract(target, catalog)
|
||||||
|
for target in targets
|
||||||
|
if isinstance(target, dict) and str(target.get("severity") or "").upper() in {"P0", "P1"}
|
||||||
|
]
|
||||||
|
rerun_matrix = collect_rerun_matrix(repair_targets)
|
||||||
|
result = {
|
||||||
|
"schema_version": "business_audit_contract_v1",
|
||||||
|
"created_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
||||||
|
"overall_status": normalize_business_audit_status(loop_decision),
|
||||||
|
"quality_score": int(analyst_verdict.get("quality_score") or 0),
|
||||||
|
"target_score": target_score,
|
||||||
|
"loop_decision": loop_decision,
|
||||||
|
"analyst_accepted_gate": analyst_accepted_gate,
|
||||||
|
"accepted_gate": accepted_gate,
|
||||||
|
"deterministic_gate_ok": deterministic_gate_ok,
|
||||||
|
"deterministic_gate_reason": deterministic_gate_reason,
|
||||||
|
"human_meaning": {
|
||||||
|
"user_intent_summary": analyst_verdict.get("user_intent_summary"),
|
||||||
|
"expected_direct_answer": analyst_verdict.get("expected_direct_answer"),
|
||||||
|
"actual_direct_answer": analyst_verdict.get("actual_direct_answer"),
|
||||||
|
},
|
||||||
|
"quality_flags": {
|
||||||
|
"direct_answer_ok": bool(analyst_verdict.get("direct_answer_ok", True)),
|
||||||
|
"business_usefulness_ok": bool(analyst_verdict.get("business_usefulness_ok", True)),
|
||||||
|
"temporal_honesty_ok": bool(analyst_verdict.get("temporal_honesty_ok", True)),
|
||||||
|
"field_truth_ok": bool(analyst_verdict.get("field_truth_ok", True)),
|
||||||
|
"answer_layering_ok": bool(analyst_verdict.get("answer_layering_ok", True)),
|
||||||
|
"regression_detected": bool(analyst_verdict.get("regression_detected")),
|
||||||
|
},
|
||||||
|
"root_layers": normalize_string_list(analyst_verdict.get("root_cause_layers")),
|
||||||
|
"violated_invariants": normalize_string_list(analyst_verdict.get("violated_invariants")),
|
||||||
|
"blocking_issues": blocking_issues,
|
||||||
|
"repair_targets_summary": {
|
||||||
|
"target_count": repair_targets.get("target_count"),
|
||||||
|
"severity_counts": repair_targets.get("severity_counts") or {},
|
||||||
|
"priority_foci": _limited_dict_items(repair_targets.get("priority_foci"), limit=8),
|
||||||
|
},
|
||||||
|
"rerun_matrix": rerun_matrix,
|
||||||
|
"artifact_refs": {
|
||||||
|
"business_audit_md": repo_relative(business_audit_markdown_path),
|
||||||
|
"analyst_verdict_json": repo_relative(analyst_verdict_path),
|
||||||
|
"repair_targets_json": repo_relative(repair_targets_path),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
artifact_refs = result["artifact_refs"]
|
||||||
|
if business_audit_json_path is not None:
|
||||||
|
artifact_refs["business_audit_json"] = repo_relative(business_audit_json_path)
|
||||||
|
if issue_catalog_snapshot_path is not None:
|
||||||
|
artifact_refs["issue_catalog_snapshot_json"] = repo_relative(issue_catalog_snapshot_path)
|
||||||
|
if rerun_matrix_path is not None:
|
||||||
|
artifact_refs["rerun_matrix_json"] = repo_relative(rerun_matrix_path)
|
||||||
|
if detector_candidates_path is not None:
|
||||||
|
artifact_refs["detector_candidates_json"] = repo_relative(detector_candidates_path)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def build_business_audit_markdown(
|
def build_business_audit_markdown(
|
||||||
*,
|
*,
|
||||||
analyst_verdict: dict[str, Any],
|
analyst_verdict: dict[str, Any],
|
||||||
|
|
@ -4724,6 +5242,10 @@ def build_lead_coder_handoff(
|
||||||
analyst_verdict_path: Path,
|
analyst_verdict_path: Path,
|
||||||
repair_targets_path: Path,
|
repair_targets_path: Path,
|
||||||
business_audit_path: Path,
|
business_audit_path: Path,
|
||||||
|
business_audit_json_path: Path | None = None,
|
||||||
|
issue_catalog_snapshot_path: Path | None = None,
|
||||||
|
rerun_matrix_path: Path | None = None,
|
||||||
|
detector_candidates_path: Path | None = None,
|
||||||
analyst_verdict: dict[str, Any],
|
analyst_verdict: dict[str, Any],
|
||||||
repair_targets: dict[str, Any],
|
repair_targets: dict[str, Any],
|
||||||
target_score: int,
|
target_score: int,
|
||||||
|
|
@ -4738,7 +5260,16 @@ def build_lead_coder_handoff(
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
assigned_focus = select_primary_repair_focus(repair_targets)
|
assigned_focus = select_primary_repair_focus(repair_targets)
|
||||||
priority_foci = _limited_dict_items(repair_targets.get("priority_foci") if isinstance(repair_targets, dict) else [])
|
priority_foci = _limited_dict_items(repair_targets.get("priority_foci") if isinstance(repair_targets, dict) else [])
|
||||||
repair_items = _limited_dict_items(repair_targets.get("targets") if isinstance(repair_targets, dict) else [], limit=8)
|
repair_target_items = repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []
|
||||||
|
repair_items = _limited_dict_items(repair_target_items, limit=8)
|
||||||
|
issue_codes = sorted(
|
||||||
|
{
|
||||||
|
str(target.get("issue_code") or "").strip()
|
||||||
|
for target in repair_target_items
|
||||||
|
if isinstance(target, dict) and str(target.get("issue_code") or "").strip()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
rerun_matrix = collect_rerun_matrix(repair_targets)
|
||||||
route_candidate_groups = _limited_dict_items(
|
route_candidate_groups = _limited_dict_items(
|
||||||
repair_targets.get("route_candidate_groups") if isinstance(repair_targets, dict) else [],
|
repair_targets.get("route_candidate_groups") if isinstance(repair_targets, dict) else [],
|
||||||
limit=8,
|
limit=8,
|
||||||
|
|
@ -4749,6 +5280,24 @@ def build_lead_coder_handoff(
|
||||||
if isinstance(item, dict) and str(item.get("target_source") or "") == "route_candidate_enablement"
|
if isinstance(item, dict) and str(item.get("target_source") or "") == "route_candidate_enablement"
|
||||||
]
|
]
|
||||||
candidate_files = [repo_relative(path) for path in build_coder_snapshot_paths(repair_targets)]
|
candidate_files = [repo_relative(path) for path in build_coder_snapshot_paths(repair_targets)]
|
||||||
|
artifact_refs = {
|
||||||
|
"pack_dir": repo_relative(pack_dir),
|
||||||
|
"business_audit": repo_relative(business_audit_path),
|
||||||
|
"analyst_verdict": repo_relative(analyst_verdict_path),
|
||||||
|
"repair_targets": repo_relative(repair_targets_path),
|
||||||
|
"pack_summary": repo_relative(pack_dir / "pack_summary.md"),
|
||||||
|
"pack_state": repo_relative(pack_dir / "pack_state.json"),
|
||||||
|
"scenario_acceptance_matrix": repo_relative(pack_dir / "scenario_acceptance_matrix.md"),
|
||||||
|
}
|
||||||
|
if business_audit_json_path is not None:
|
||||||
|
artifact_refs["business_audit_json"] = repo_relative(business_audit_json_path)
|
||||||
|
if issue_catalog_snapshot_path is not None:
|
||||||
|
artifact_refs["issue_catalog_snapshot"] = repo_relative(issue_catalog_snapshot_path)
|
||||||
|
if rerun_matrix_path is not None:
|
||||||
|
artifact_refs["rerun_matrix"] = repo_relative(rerun_matrix_path)
|
||||||
|
if detector_candidates_path is not None:
|
||||||
|
artifact_refs["detector_candidates"] = repo_relative(detector_candidates_path)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"schema_version": "domain_loop_lead_coder_handoff_v1",
|
"schema_version": "domain_loop_lead_coder_handoff_v1",
|
||||||
"repair_mode": REPAIR_MODE_LEAD_HANDOFF,
|
"repair_mode": REPAIR_MODE_LEAD_HANDOFF,
|
||||||
|
|
@ -4767,15 +5316,9 @@ def build_lead_coder_handoff(
|
||||||
"requires_user_decision": requires_user_decision,
|
"requires_user_decision": requires_user_decision,
|
||||||
"user_decision_type": user_decision_type,
|
"user_decision_type": user_decision_type,
|
||||||
"user_decision_prompt": user_decision_prompt,
|
"user_decision_prompt": user_decision_prompt,
|
||||||
"artifact_refs": {
|
"artifact_refs": artifact_refs,
|
||||||
"pack_dir": repo_relative(pack_dir),
|
"issue_codes": issue_codes,
|
||||||
"business_audit": repo_relative(business_audit_path),
|
"rerun_matrix": rerun_matrix,
|
||||||
"analyst_verdict": repo_relative(analyst_verdict_path),
|
|
||||||
"repair_targets": repo_relative(repair_targets_path),
|
|
||||||
"pack_summary": repo_relative(pack_dir / "pack_summary.md"),
|
|
||||||
"pack_state": repo_relative(pack_dir / "pack_state.json"),
|
|
||||||
"scenario_acceptance_matrix": repo_relative(pack_dir / "scenario_acceptance_matrix.md"),
|
|
||||||
},
|
|
||||||
"human_meaning": {
|
"human_meaning": {
|
||||||
"user_intent_summary": analyst_verdict.get("user_intent_summary"),
|
"user_intent_summary": analyst_verdict.get("user_intent_summary"),
|
||||||
"expected_direct_answer": analyst_verdict.get("expected_direct_answer"),
|
"expected_direct_answer": analyst_verdict.get("expected_direct_answer"),
|
||||||
|
|
@ -4792,7 +5335,10 @@ def build_lead_coder_handoff(
|
||||||
"candidate_files": candidate_files,
|
"candidate_files": candidate_files,
|
||||||
"lead_instructions": [
|
"lead_instructions": [
|
||||||
"Read business_audit.md first and judge the user-facing answer before debug metadata.",
|
"Read business_audit.md first and judge the user-facing answer before debug metadata.",
|
||||||
|
"Use business_audit.json, issue_catalog_snapshot.json, rerun_matrix.json, and detector_candidates.json as the repair contract.",
|
||||||
"Inspect analyst_verdict.json and repair_targets.json only after the semantic defect is clear.",
|
"Inspect analyst_verdict.json and repair_targets.json only after the semantic defect is clear.",
|
||||||
|
"Patch only inside allowed_patch_targets for the issue_code unless Lead Codex explicitly expands scope.",
|
||||||
|
"Do not touch forbidden_patch_targets and do not repair by masking detector symptoms.",
|
||||||
"Use route_candidate_groups to distinguish missing user scope from a reviewed-route enablement gap before patching.",
|
"Use route_candidate_groups to distinguish missing user scope from a reviewed-route enablement gap before patching.",
|
||||||
"Patch code manually in the main Codex context; do not launch a weak autonomous coder by default.",
|
"Patch code manually in the main Codex context; do not launch a weak autonomous coder by default.",
|
||||||
"Keep the patch narrow, preserve UTF-8 without BOM, run targeted tests/build, rebuild graphify after code edits, then rerun the same semantic pack.",
|
"Keep the patch narrow, preserve UTF-8 without BOM, run targeted tests/build, rebuild graphify after code edits, then rerun the same semantic pack.",
|
||||||
|
|
@ -4816,10 +5362,19 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
|
||||||
"",
|
"",
|
||||||
"## Read First",
|
"## Read First",
|
||||||
f"- business_audit: `{artifact_refs.get('business_audit')}`",
|
f"- business_audit: `{artifact_refs.get('business_audit')}`",
|
||||||
|
f"- business_audit_json: `{artifact_refs.get('business_audit_json') or 'n/a'}`",
|
||||||
f"- analyst_verdict: `{artifact_refs.get('analyst_verdict')}`",
|
f"- analyst_verdict: `{artifact_refs.get('analyst_verdict')}`",
|
||||||
f"- repair_targets: `{artifact_refs.get('repair_targets')}`",
|
f"- repair_targets: `{artifact_refs.get('repair_targets')}`",
|
||||||
|
f"- issue_catalog_snapshot: `{artifact_refs.get('issue_catalog_snapshot') or 'n/a'}`",
|
||||||
|
f"- rerun_matrix: `{artifact_refs.get('rerun_matrix') or 'n/a'}`",
|
||||||
|
f"- detector_candidates: `{artifact_refs.get('detector_candidates') or 'n/a'}`",
|
||||||
|
f"- auto_coder_gate: `{artifact_refs.get('auto_coder_gate') or 'n/a'}`",
|
||||||
f"- pack_dir: `{artifact_refs.get('pack_dir')}`",
|
f"- pack_dir: `{artifact_refs.get('pack_dir')}`",
|
||||||
"",
|
"",
|
||||||
|
"## Repair Contract",
|
||||||
|
f"- issue_codes: `{', '.join(normalize_string_list(handoff.get('issue_codes'))) or 'n/a'}`",
|
||||||
|
f"- rerun_matrix: `{', '.join(normalize_string_list(handoff.get('rerun_matrix'))) or 'n/a'}`",
|
||||||
|
"",
|
||||||
"## Human Meaning",
|
"## Human Meaning",
|
||||||
f"- user_intent_summary: {human_meaning.get('user_intent_summary') or 'n/a'}",
|
f"- user_intent_summary: {human_meaning.get('user_intent_summary') or 'n/a'}",
|
||||||
f"- expected_direct_answer: {human_meaning.get('expected_direct_answer') or 'n/a'}",
|
f"- expected_direct_answer: {human_meaning.get('expected_direct_answer') or 'n/a'}",
|
||||||
|
|
@ -4853,10 +5408,15 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
|
||||||
lines.extend(
|
lines.extend(
|
||||||
[
|
[
|
||||||
f"- `{target.get('target_id') or 'n/a'}`",
|
f"- `{target.get('target_id') or 'n/a'}`",
|
||||||
|
f" issue_code: `{target.get('issue_code') or 'n/a'}`",
|
||||||
f" severity: `{target.get('severity') or 'n/a'}`",
|
f" severity: `{target.get('severity') or 'n/a'}`",
|
||||||
f" problem_type: `{target.get('problem_type') or 'n/a'}`",
|
f" problem_type: `{target.get('problem_type') or 'n/a'}`",
|
||||||
|
f" expected_contract: `{target.get('expected_business_answer_contract') or 'n/a'}`",
|
||||||
f" source: `{target.get('target_source') or 'n/a'}`",
|
f" source: `{target.get('target_source') or 'n/a'}`",
|
||||||
f" fix_goal: {target.get('fix_goal') or 'n/a'}",
|
f" fix_goal: {target.get('fix_goal') or 'n/a'}",
|
||||||
|
f" allowed_patch_targets: `{', '.join(normalize_string_list(target.get('allowed_patch_targets'))) or 'n/a'}`",
|
||||||
|
f" forbidden_patch_targets: `{', '.join(normalize_string_list(target.get('forbidden_patch_targets'))) or 'n/a'}`",
|
||||||
|
f" rerun_matrix: `{', '.join(normalize_string_list(target.get('rerun_matrix'))) or 'n/a'}`",
|
||||||
f" candidate_files: `{', '.join(candidate_files) if candidate_files else 'n/a'}`",
|
f" candidate_files: `{', '.join(candidate_files) if candidate_files else 'n/a'}`",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -4948,6 +5508,19 @@ def handle_run_pack(args: argparse.Namespace) -> int:
|
||||||
scenarios_dir.mkdir(parents=True, exist_ok=True)
|
scenarios_dir.mkdir(parents=True, exist_ok=True)
|
||||||
write_json(pack_dir / "pack_manifest.json", pack)
|
write_json(pack_dir / "pack_manifest.json", pack)
|
||||||
write_text(pack_dir / "manifest_source.txt", f"{pack_path}\n")
|
write_text(pack_dir / "manifest_source.txt", f"{pack_path}\n")
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
pack_dir,
|
||||||
|
runner="domain_case_loop.run-pack",
|
||||||
|
args=args,
|
||||||
|
spec_path=pack_path,
|
||||||
|
run_id=pack["pack_id"],
|
||||||
|
extra={
|
||||||
|
"domain": pack["domain"],
|
||||||
|
"title": pack["title"],
|
||||||
|
"scenario_count": len(pack.get("scenarios") or []),
|
||||||
|
"max_scenarios": getattr(args, "max_scenarios", None),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
scenario_results: list[dict[str, Any]] = []
|
scenario_results: list[dict[str, Any]] = []
|
||||||
max_scenarios = max(0, int(args.max_scenarios)) if args.max_scenarios is not None else None
|
max_scenarios = max(0, int(args.max_scenarios)) if args.max_scenarios is not None else None
|
||||||
|
|
@ -5037,7 +5610,11 @@ def build_loop_summary(loop_state: dict[str, Any]) -> str:
|
||||||
f" coder_workspace_hygiene_restored_files: `{', '.join(item.get('coder_workspace_hygiene_restored_files') or []) or 'none'}`",
|
f" coder_workspace_hygiene_restored_files: `{', '.join(item.get('coder_workspace_hygiene_restored_files') or []) or 'none'}`",
|
||||||
f" analyst_verdict: `{item.get('analyst_verdict_path') or 'n/a'}`",
|
f" analyst_verdict: `{item.get('analyst_verdict_path') or 'n/a'}`",
|
||||||
f" business_audit: `{item.get('business_audit_path') or 'n/a'}`",
|
f" business_audit: `{item.get('business_audit_path') or 'n/a'}`",
|
||||||
|
f" business_audit_json: `{item.get('business_audit_json_path') or 'n/a'}`",
|
||||||
f" repair_targets: `{item.get('repair_targets_path') or 'n/a'}`",
|
f" repair_targets: `{item.get('repair_targets_path') or 'n/a'}`",
|
||||||
|
f" rerun_matrix: `{item.get('rerun_matrix_path') or 'n/a'}`",
|
||||||
|
f" detector_candidates: `{item.get('detector_candidates_path') or 'n/a'}`",
|
||||||
|
f" auto_coder_gate: `{item.get('auto_coder_gate_path') or 'n/a'}`",
|
||||||
f" lead_coder_handoff: `{item.get('lead_coder_handoff_path') or 'n/a'}`",
|
f" lead_coder_handoff: `{item.get('lead_coder_handoff_path') or 'n/a'}`",
|
||||||
f" repair_target_count: `{item.get('repair_target_count')}`",
|
f" repair_target_count: `{item.get('repair_target_count')}`",
|
||||||
f" repair_target_severity_counts: `{dump_json(item.get('repair_target_severity_counts') or {})}`",
|
f" repair_target_severity_counts: `{dump_json(item.get('repair_target_severity_counts') or {})}`",
|
||||||
|
|
@ -5057,6 +5634,8 @@ def build_loop_final_status(loop_state: dict[str, Any]) -> str:
|
||||||
- target_score: `{loop_state['target_score']}`
|
- target_score: `{loop_state['target_score']}`
|
||||||
- iterations_ran: `{len(loop_state.get('iterations', []))}`
|
- iterations_ran: `{len(loop_state.get('iterations', []))}`
|
||||||
- last_analyst_decision: `{loop_state.get('last_analyst_decision') or 'n/a'}`
|
- last_analyst_decision: `{loop_state.get('last_analyst_decision') or 'n/a'}`
|
||||||
|
- latest_business_audit_json: `{loop_state.get('latest_business_audit_json_path') or 'n/a'}`
|
||||||
|
- latest_rerun_matrix: `{loop_state.get('latest_rerun_matrix_path') or 'n/a'}`
|
||||||
- latest_lead_coder_handoff: `{loop_state.get('latest_lead_coder_handoff_path') or 'n/a'}`
|
- latest_lead_coder_handoff: `{loop_state.get('latest_lead_coder_handoff_path') or 'n/a'}`
|
||||||
- stop_reason: {loop_state.get('stop_reason') or 'n/a'}
|
- stop_reason: {loop_state.get('stop_reason') or 'n/a'}
|
||||||
"""
|
"""
|
||||||
|
|
@ -5070,6 +5649,18 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
iterations_dir = loop_dir / "iterations"
|
iterations_dir = loop_dir / "iterations"
|
||||||
iterations_dir.mkdir(parents=True, exist_ok=True)
|
iterations_dir.mkdir(parents=True, exist_ok=True)
|
||||||
write_text(loop_dir / "manifest_source.txt", f"{manifest_path}\n")
|
write_text(loop_dir / "manifest_source.txt", f"{manifest_path}\n")
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
loop_dir,
|
||||||
|
runner="domain_case_loop.run-pack-loop",
|
||||||
|
args=args,
|
||||||
|
spec_path=manifest_path,
|
||||||
|
run_id=loop_id,
|
||||||
|
extra={
|
||||||
|
"target_score": args.target_score,
|
||||||
|
"max_iterations": args.max_iterations,
|
||||||
|
"repair_mode": getattr(args, "repair_mode", REPAIR_MODE_LEAD_HANDOFF),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
target_score = int(args.target_score)
|
target_score = int(args.target_score)
|
||||||
max_iterations = int(args.max_iterations)
|
max_iterations = int(args.max_iterations)
|
||||||
|
|
@ -5166,6 +5757,10 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
)
|
)
|
||||||
deterministic_gate_ok, deterministic_gate_reason = evaluate_deterministic_loop_gate(pack_state, repair_targets)
|
deterministic_gate_ok, deterministic_gate_reason = evaluate_deterministic_loop_gate(pack_state, repair_targets)
|
||||||
business_audit_path = iteration_dir / "business_audit.md"
|
business_audit_path = iteration_dir / "business_audit.md"
|
||||||
|
business_audit_json_path = iteration_dir / "business_audit.json"
|
||||||
|
issue_catalog_snapshot_path = iteration_dir / "issue_catalog_snapshot.json"
|
||||||
|
rerun_matrix_path = iteration_dir / "rerun_matrix.json"
|
||||||
|
detector_candidates_path = iteration_dir / "detector_candidates.json"
|
||||||
write_text(
|
write_text(
|
||||||
business_audit_path,
|
business_audit_path,
|
||||||
build_business_audit_markdown(
|
build_business_audit_markdown(
|
||||||
|
|
@ -5175,6 +5770,34 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
accepted_gate = analyst_accepted_gate and deterministic_gate_ok
|
accepted_gate = analyst_accepted_gate and deterministic_gate_ok
|
||||||
|
issue_catalog_snapshot = build_issue_catalog_snapshot(repair_targets)
|
||||||
|
rerun_matrix_contract = {
|
||||||
|
"schema_version": "rerun_matrix_v1",
|
||||||
|
"source_repair_targets": repo_relative(repair_targets_path),
|
||||||
|
"items": collect_rerun_matrix(repair_targets),
|
||||||
|
}
|
||||||
|
detector_candidates = build_detector_candidates(repair_targets)
|
||||||
|
business_audit_contract = build_business_audit_contract(
|
||||||
|
analyst_verdict=analyst_verdict,
|
||||||
|
repair_targets=repair_targets,
|
||||||
|
target_score=target_score,
|
||||||
|
loop_decision=loop_decision,
|
||||||
|
analyst_accepted_gate=analyst_accepted_gate,
|
||||||
|
accepted_gate=accepted_gate,
|
||||||
|
deterministic_gate_ok=deterministic_gate_ok,
|
||||||
|
deterministic_gate_reason=deterministic_gate_reason,
|
||||||
|
business_audit_markdown_path=business_audit_path,
|
||||||
|
analyst_verdict_path=analyst_verdict_path,
|
||||||
|
repair_targets_path=repair_targets_path,
|
||||||
|
business_audit_json_path=business_audit_json_path,
|
||||||
|
issue_catalog_snapshot_path=issue_catalog_snapshot_path,
|
||||||
|
rerun_matrix_path=rerun_matrix_path,
|
||||||
|
detector_candidates_path=detector_candidates_path,
|
||||||
|
)
|
||||||
|
write_json(business_audit_json_path, business_audit_contract)
|
||||||
|
write_json(issue_catalog_snapshot_path, issue_catalog_snapshot)
|
||||||
|
write_json(rerun_matrix_path, rerun_matrix_contract)
|
||||||
|
write_json(detector_candidates_path, detector_candidates)
|
||||||
repair_target_count = int(repair_targets.get("target_count") or 0) if isinstance(repair_targets, dict) else 0
|
repair_target_count = int(repair_targets.get("target_count") or 0) if isinstance(repair_targets, dict) else 0
|
||||||
repair_target_severity_counts = (
|
repair_target_severity_counts = (
|
||||||
repair_targets.get("severity_counts")
|
repair_targets.get("severity_counts")
|
||||||
|
|
@ -5184,6 +5807,11 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
loop_state["last_analyst_decision"] = loop_decision
|
loop_state["last_analyst_decision"] = loop_decision
|
||||||
loop_state["last_user_decision_type"] = user_decision_type
|
loop_state["last_user_decision_type"] = user_decision_type
|
||||||
loop_state["last_user_decision_prompt"] = user_decision_prompt
|
loop_state["last_user_decision_prompt"] = user_decision_prompt
|
||||||
|
loop_state["latest_business_audit_path"] = str(business_audit_path)
|
||||||
|
loop_state["latest_business_audit_json_path"] = str(business_audit_json_path)
|
||||||
|
loop_state["latest_issue_catalog_snapshot_path"] = str(issue_catalog_snapshot_path)
|
||||||
|
loop_state["latest_rerun_matrix_path"] = str(rerun_matrix_path)
|
||||||
|
loop_state["latest_detector_candidates_path"] = str(detector_candidates_path)
|
||||||
|
|
||||||
iteration_record: dict[str, Any] = {
|
iteration_record: dict[str, Any] = {
|
||||||
"iteration_id": iteration_id,
|
"iteration_id": iteration_id,
|
||||||
|
|
@ -5199,7 +5827,11 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
"user_decision_prompt": user_decision_prompt,
|
"user_decision_prompt": user_decision_prompt,
|
||||||
"analyst_verdict_path": str(analyst_verdict_path),
|
"analyst_verdict_path": str(analyst_verdict_path),
|
||||||
"business_audit_path": str(business_audit_path),
|
"business_audit_path": str(business_audit_path),
|
||||||
|
"business_audit_json_path": str(business_audit_json_path),
|
||||||
"repair_targets_path": str(repair_targets_path),
|
"repair_targets_path": str(repair_targets_path),
|
||||||
|
"issue_catalog_snapshot_path": str(issue_catalog_snapshot_path),
|
||||||
|
"rerun_matrix_path": str(rerun_matrix_path),
|
||||||
|
"detector_candidates_path": str(detector_candidates_path),
|
||||||
"repair_target_count": repair_target_count,
|
"repair_target_count": repair_target_count,
|
||||||
"repair_target_severity_counts": repair_target_severity_counts,
|
"repair_target_severity_counts": repair_target_severity_counts,
|
||||||
"coder_status": None,
|
"coder_status": None,
|
||||||
|
|
@ -5241,6 +5873,10 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
analyst_verdict_path=analyst_verdict_path,
|
analyst_verdict_path=analyst_verdict_path,
|
||||||
repair_targets_path=repair_targets_path,
|
repair_targets_path=repair_targets_path,
|
||||||
business_audit_path=business_audit_path,
|
business_audit_path=business_audit_path,
|
||||||
|
business_audit_json_path=business_audit_json_path,
|
||||||
|
issue_catalog_snapshot_path=issue_catalog_snapshot_path,
|
||||||
|
rerun_matrix_path=rerun_matrix_path,
|
||||||
|
detector_candidates_path=detector_candidates_path,
|
||||||
analyst_verdict=analyst_verdict,
|
analyst_verdict=analyst_verdict,
|
||||||
repair_targets=repair_targets,
|
repair_targets=repair_targets,
|
||||||
target_score=target_score,
|
target_score=target_score,
|
||||||
|
|
@ -5278,8 +5914,61 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
write_json(loop_dir / "loop_state.json", loop_state)
|
write_json(loop_dir / "loop_state.json", loop_state)
|
||||||
break
|
break
|
||||||
|
|
||||||
coder_result_path = iteration_dir / "coder_result.json"
|
|
||||||
assigned_focus = select_primary_repair_focus(repair_targets)
|
assigned_focus = select_primary_repair_focus(repair_targets)
|
||||||
|
auto_coder_gate_path = iteration_dir / "auto_coder_gate.json"
|
||||||
|
auto_coder_gate = evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||||||
|
write_json(auto_coder_gate_path, auto_coder_gate)
|
||||||
|
iteration_record["auto_coder_gate_path"] = str(auto_coder_gate_path)
|
||||||
|
if not bool(auto_coder_gate.get("allowed")):
|
||||||
|
handoff = build_lead_coder_handoff(
|
||||||
|
loop_state=loop_state,
|
||||||
|
iteration_id=iteration_id,
|
||||||
|
pack_dir=pack_dir,
|
||||||
|
analyst_verdict_path=analyst_verdict_path,
|
||||||
|
repair_targets_path=repair_targets_path,
|
||||||
|
business_audit_path=business_audit_path,
|
||||||
|
business_audit_json_path=business_audit_json_path,
|
||||||
|
issue_catalog_snapshot_path=issue_catalog_snapshot_path,
|
||||||
|
rerun_matrix_path=rerun_matrix_path,
|
||||||
|
detector_candidates_path=detector_candidates_path,
|
||||||
|
analyst_verdict=analyst_verdict,
|
||||||
|
repair_targets=repair_targets,
|
||||||
|
target_score=target_score,
|
||||||
|
loop_decision=loop_decision,
|
||||||
|
analyst_accepted_gate=analyst_accepted_gate,
|
||||||
|
accepted_gate=accepted_gate,
|
||||||
|
deterministic_gate_ok=deterministic_gate_ok,
|
||||||
|
deterministic_gate_reason=deterministic_gate_reason,
|
||||||
|
requires_user_decision=requires_user_decision,
|
||||||
|
user_decision_type=user_decision_type,
|
||||||
|
user_decision_prompt=user_decision_prompt,
|
||||||
|
)
|
||||||
|
handoff["status"] = "auto_coder_gate_blocked_lead_handoff_required"
|
||||||
|
handoff["reason"] = "auto-coder was explicitly requested, but the issue catalog repair contract did not pass the auto-coder gate"
|
||||||
|
handoff["auto_coder_gate"] = auto_coder_gate
|
||||||
|
if isinstance(handoff.get("artifact_refs"), dict):
|
||||||
|
handoff["artifact_refs"]["auto_coder_gate"] = repo_relative(auto_coder_gate_path)
|
||||||
|
handoff_paths = save_lead_coder_handoff(
|
||||||
|
loop_dir=loop_dir,
|
||||||
|
iteration_dir=iteration_dir,
|
||||||
|
handoff=handoff,
|
||||||
|
)
|
||||||
|
iteration_record["coder_status"] = "auto_coder_gate_blocked"
|
||||||
|
iteration_record.update(handoff_paths)
|
||||||
|
if assigned_focus:
|
||||||
|
iteration_record["assigned_repair_focus_id"] = str(assigned_focus.get("focus_id") or "")
|
||||||
|
loop_state["iterations"].append(iteration_record)
|
||||||
|
loop_state["latest_lead_coder_handoff_path"] = handoff_paths["latest_lead_coder_handoff_path"]
|
||||||
|
loop_state["latest_lead_coder_handoff_markdown_path"] = handoff_paths[
|
||||||
|
"latest_lead_coder_handoff_markdown_path"
|
||||||
|
]
|
||||||
|
loop_state["final_status"] = loop_decision if loop_decision in {"needs_exact_capability", "partial"} else "partial"
|
||||||
|
loop_state["stop_reason"] = f"auto_coder_gate_blocked at {iteration_id}: {auto_coder_gate.get('reason')}"
|
||||||
|
loop_state["updated_at"] = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
||||||
|
write_json(loop_dir / "loop_state.json", loop_state)
|
||||||
|
break
|
||||||
|
|
||||||
|
coder_result_path = iteration_dir / "coder_result.json"
|
||||||
coder_prompt = build_coder_loop_prompt(
|
coder_prompt = build_coder_loop_prompt(
|
||||||
loop_dir=loop_dir,
|
loop_dir=loop_dir,
|
||||||
iteration_dir=iteration_dir,
|
iteration_dir=iteration_dir,
|
||||||
|
|
@ -5289,6 +5978,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
||||||
assigned_focus=assigned_focus,
|
assigned_focus=assigned_focus,
|
||||||
analyst_verdict_path=analyst_verdict_path,
|
analyst_verdict_path=analyst_verdict_path,
|
||||||
analyst_verdict_json=dump_json(analyst_verdict),
|
analyst_verdict_json=dump_json(analyst_verdict),
|
||||||
|
auto_coder_gate_json=dump_json(auto_coder_gate),
|
||||||
)
|
)
|
||||||
write_text(iteration_dir / "coder_prompt.md", coder_prompt + "\n")
|
write_text(iteration_dir / "coder_prompt.md", coder_prompt + "\n")
|
||||||
coder_snapshot_paths = build_coder_snapshot_paths(repair_targets)
|
coder_snapshot_paths = build_coder_snapshot_paths(repair_targets)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from types import SimpleNamespace
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import domain_case_loop as dcl
|
import domain_case_loop as dcl
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
import check_mcp_live_readiness as mcp_readiness
|
import check_mcp_live_readiness as mcp_readiness
|
||||||
import scenario_acceptance_policy as sap
|
import scenario_acceptance_policy as sap
|
||||||
|
|
||||||
|
|
@ -1522,6 +1523,18 @@ def run_live(spec: dict[str, Any], output_dir: Path, args: argparse.Namespace) -
|
||||||
dcl.ensure_backend_health(runner_args.backend_url, runner_args.timeout_seconds)
|
dcl.ensure_backend_health(runner_args.backend_url, runner_args.timeout_seconds)
|
||||||
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
output_dir,
|
||||||
|
runner="domain_truth_harness.run-live",
|
||||||
|
args=args,
|
||||||
|
spec_path=Path(args.spec).resolve() if getattr(args, "spec", None) else None,
|
||||||
|
run_id=spec["scenario_id"],
|
||||||
|
extra={
|
||||||
|
"domain": spec["domain"],
|
||||||
|
"title": spec["title"],
|
||||||
|
"require_mcp_live_readiness": bool(getattr(args, "require_mcp_live_readiness", False)),
|
||||||
|
},
|
||||||
|
)
|
||||||
manifest = build_generated_manifest(spec)
|
manifest = build_generated_manifest(spec)
|
||||||
write_json(output_dir / "truth_harness_spec.json", spec)
|
write_json(output_dir / "truth_harness_spec.json", spec)
|
||||||
write_json(output_dir / "scenario_manifest.json", manifest)
|
write_json(output_dir / "scenario_manifest.json", manifest)
|
||||||
|
|
@ -1671,6 +1684,18 @@ def handle_review_export(args: argparse.Namespace) -> int:
|
||||||
output_dir = Path(args.output_dir).resolve() if args.output_dir else default_output_dir(
|
output_dir = Path(args.output_dir).resolve() if args.output_dir else default_output_dir(
|
||||||
f"{spec['scenario_id']}_review"
|
f"{spec['scenario_id']}_review"
|
||||||
)
|
)
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
output_dir,
|
||||||
|
runner="domain_truth_harness.review-export",
|
||||||
|
args=args,
|
||||||
|
spec_path=spec_path,
|
||||||
|
run_id=spec["scenario_id"],
|
||||||
|
extra={
|
||||||
|
"domain": spec["domain"],
|
||||||
|
"title": spec["title"],
|
||||||
|
"source_export": str(export_path),
|
||||||
|
},
|
||||||
|
)
|
||||||
result = review_export(spec, export_path, output_dir)
|
result = review_export(spec, export_path, output_dir)
|
||||||
print(f"[truth-harness] review-export overall_status={result['review_summary']['overall_status']}")
|
print(f"[truth-harness] review-export overall_status={result['review_summary']['overall_status']}")
|
||||||
print(f"[truth-harness] review-export final_status={result['pack_state']['final_status']}")
|
print(f"[truth-harness] review-export final_status={result['pack_state']['final_status']}")
|
||||||
|
|
@ -1684,6 +1709,19 @@ def handle_run_live(args: argparse.Namespace) -> int:
|
||||||
output_dir = Path(args.output_dir).resolve() if args.output_dir else default_output_dir(
|
output_dir = Path(args.output_dir).resolve() if args.output_dir else default_output_dir(
|
||||||
f"{spec['scenario_id']}_live"
|
f"{spec['scenario_id']}_live"
|
||||||
)
|
)
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
output_dir,
|
||||||
|
runner="domain_truth_harness.run-live",
|
||||||
|
args=args,
|
||||||
|
spec_path=spec_path,
|
||||||
|
run_id=spec["scenario_id"],
|
||||||
|
extra={
|
||||||
|
"domain": spec["domain"],
|
||||||
|
"title": spec["title"],
|
||||||
|
"require_mcp_live_readiness": bool(getattr(args, "require_mcp_live_readiness", False)),
|
||||||
|
"preflight_manifest": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
if args.require_mcp_live_readiness:
|
if args.require_mcp_live_readiness:
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
readiness = mcp_readiness.check_readiness(
|
readiness = mcp_readiness.check_readiness(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
|
||||||
|
|
||||||
|
def build_markdown_summary(health: dict[str, object]) -> str:
|
||||||
|
lines = [
|
||||||
|
"# Prompt registry healthcheck",
|
||||||
|
"",
|
||||||
|
f"- status: `{health.get('status')}`",
|
||||||
|
f"- default_prompt_version: `{health.get('default_prompt_version') or 'n/a'}`",
|
||||||
|
f"- active_prompt_version: `{health.get('active_prompt_version') or 'n/a'}`",
|
||||||
|
f"- prompt_source: `{health.get('prompt_source') or 'n/a'}`",
|
||||||
|
f"- prompt_hash: `{health.get('prompt_hash') or 'n/a'}`",
|
||||||
|
"",
|
||||||
|
"## Prompt files",
|
||||||
|
]
|
||||||
|
files = health.get("prompt_files") if isinstance(health.get("prompt_files"), list) else []
|
||||||
|
if not files:
|
||||||
|
lines.append("- no prompt files resolved")
|
||||||
|
else:
|
||||||
|
for item in files:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
exists = "yes" if item.get("exists") is True else "no"
|
||||||
|
lines.append(f"- `{item.get('slot')}` `{item.get('relative_path')}` exists=`{exists}`")
|
||||||
|
|
||||||
|
failures = health.get("failures") if isinstance(health.get("failures"), list) else []
|
||||||
|
warnings = health.get("warnings") if isinstance(health.get("warnings"), list) else []
|
||||||
|
lines.extend(["", "## Failures"])
|
||||||
|
lines.extend([f"- {item}" for item in failures] if failures else ["- none"])
|
||||||
|
lines.extend(["", "## Warnings"])
|
||||||
|
lines.extend([f"- {item}" for item in warnings] if warnings else ["- none"])
|
||||||
|
return "\n".join(lines).strip() + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Fail-loud prompt registry healthcheck for AGENT semantic runs.")
|
||||||
|
parser.add_argument("--prompt-version", help="Override active prompt version. Defaults to backend DEFAULT_PROMPT_VERSION.")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Print machine-readable JSON instead of markdown.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--allow-preset-mismatch",
|
||||||
|
action="store_true",
|
||||||
|
help="Downgrade saved preset prompt-version mismatch to warning for exploratory local runs.",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = build_parser().parse_args()
|
||||||
|
health = runtime_manifest.build_prompt_registry_health(
|
||||||
|
REPO_ROOT,
|
||||||
|
prompt_version=args.prompt_version,
|
||||||
|
strict_preset_match=not bool(args.allow_preset_mismatch),
|
||||||
|
)
|
||||||
|
if args.json:
|
||||||
|
print(json.dumps(health, ensure_ascii=False, indent=2))
|
||||||
|
else:
|
||||||
|
print(build_markdown_summary(health), end="")
|
||||||
|
return 0 if health.get("status") == "pass" else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
HISTORY_FILE = REPO_ROOT / "llm_normalizer" / "data" / "autorun_generators" / "history.json"
|
HISTORY_FILE = REPO_ROOT / "llm_normalizer" / "data" / "autorun_generators" / "history.json"
|
||||||
|
|
@ -113,8 +114,37 @@ def assert_status(value: Any, expected: str, label: str, problems: list[str]) ->
|
||||||
problems.append(f"{label}={actual or 'missing'}")
|
problems.append(f"{label}={actual or 'missing'}")
|
||||||
|
|
||||||
|
|
||||||
|
def require_effective_runtime_manifest(run_dir: Path) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
return runtime_manifest.load_effective_runtime_manifest(run_dir)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Refusing to save AGENT autorun because the accepted replay has no reproducibility manifest: "
|
||||||
|
f"{exc}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def build_effective_runtime_save_summary(manifest: dict[str, Any], run_dir: Path) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"manifest_path": repo_relative(run_dir / runtime_manifest.EFFECTIVE_RUNTIME_FILE_NAME),
|
||||||
|
"runner": manifest.get("runner"),
|
||||||
|
"git_sha": manifest.get("git_sha"),
|
||||||
|
"backend_url": manifest.get("backend_url"),
|
||||||
|
"mcp_proxy_url": manifest.get("mcp_proxy_url"),
|
||||||
|
"llm_provider": manifest.get("llm_provider"),
|
||||||
|
"llm_model": manifest.get("llm_model"),
|
||||||
|
"temperature": manifest.get("temperature"),
|
||||||
|
"max_output_tokens": manifest.get("max_output_tokens"),
|
||||||
|
"prompt_version": manifest.get("prompt_version"),
|
||||||
|
"prompt_source": manifest.get("prompt_source"),
|
||||||
|
"prompt_hash": manifest.get("prompt_hash"),
|
||||||
|
"prompt_registry_status": manifest.get("prompt_registry_status"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def validate_truth_harness_run_dir(run_dir: Path) -> dict[str, Any]:
|
def validate_truth_harness_run_dir(run_dir: Path) -> dict[str, Any]:
|
||||||
run_dir = run_dir.resolve()
|
run_dir = run_dir.resolve()
|
||||||
|
effective_runtime = require_effective_runtime_manifest(run_dir)
|
||||||
pack_state = load_json_object(run_dir / "pack_state.json", "Validated run pack_state.json")
|
pack_state = load_json_object(run_dir / "pack_state.json", "Validated run pack_state.json")
|
||||||
truth_review = load_json_object(run_dir / "truth_review.json", "Validated run truth_review.json")
|
truth_review = load_json_object(run_dir / "truth_review.json", "Validated run truth_review.json")
|
||||||
business_review = load_json_object(run_dir / "business_review.json", "Validated run business_review.json")
|
business_review = load_json_object(run_dir / "business_review.json", "Validated run business_review.json")
|
||||||
|
|
@ -153,12 +183,14 @@ def validate_truth_harness_run_dir(run_dir: Path) -> dict[str, Any]:
|
||||||
"steps_with_business_failures": business_review.get("steps_with_business_failures"),
|
"steps_with_business_failures": business_review.get("steps_with_business_failures"),
|
||||||
"steps_with_business_warnings": business_review.get("steps_with_business_warnings"),
|
"steps_with_business_warnings": business_review.get("steps_with_business_warnings"),
|
||||||
"acceptance_gate_passed": pack_state.get("acceptance_gate_passed"),
|
"acceptance_gate_passed": pack_state.get("acceptance_gate_passed"),
|
||||||
|
"effective_runtime": build_effective_runtime_save_summary(effective_runtime, run_dir),
|
||||||
"saved_after_validated_replay": True,
|
"saved_after_validated_replay": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def validate_domain_pack_loop_dir(loop_dir: Path) -> dict[str, Any]:
|
def validate_domain_pack_loop_dir(loop_dir: Path) -> dict[str, Any]:
|
||||||
loop_dir = loop_dir.resolve()
|
loop_dir = loop_dir.resolve()
|
||||||
|
effective_runtime = require_effective_runtime_manifest(loop_dir)
|
||||||
loop_state = load_json_object(loop_dir / "loop_state.json", "Validated loop_state.json")
|
loop_state = load_json_object(loop_dir / "loop_state.json", "Validated loop_state.json")
|
||||||
iterations = loop_state.get("iterations")
|
iterations = loop_state.get("iterations")
|
||||||
if not isinstance(iterations, list) or not iterations:
|
if not isinstance(iterations, list) or not iterations:
|
||||||
|
|
@ -225,6 +257,7 @@ def validate_domain_pack_loop_dir(loop_dir: Path) -> dict[str, Any]:
|
||||||
"repair_target_count": last_iteration.get("repair_target_count"),
|
"repair_target_count": last_iteration.get("repair_target_count"),
|
||||||
"repair_target_severity_counts": last_iteration.get("repair_target_severity_counts"),
|
"repair_target_severity_counts": last_iteration.get("repair_target_severity_counts"),
|
||||||
"accepted_gate": last_iteration.get("accepted_gate"),
|
"accepted_gate": last_iteration.get("accepted_gate"),
|
||||||
|
"effective_runtime": build_effective_runtime_save_summary(effective_runtime, loop_dir),
|
||||||
"saved_after_validated_replay": True,
|
"saved_after_validated_replay": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import domain_case_loop as dcl
|
import domain_case_loop as dcl
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
import review_assistant_stage1_run as gui_review
|
import review_assistant_stage1_run as gui_review
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2008,6 +2009,19 @@ def handle_run(args: argparse.Namespace) -> int:
|
||||||
stage_dir.mkdir(parents=True, exist_ok=True)
|
stage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
write_json(stage_dir / "stage_manifest.json", stage_manifest)
|
write_json(stage_dir / "stage_manifest.json", stage_manifest)
|
||||||
write_text(stage_dir / "stage_manifest_source.txt", repo_relative(stage_manifest_path) + "\n")
|
write_text(stage_dir / "stage_manifest_source.txt", repo_relative(stage_manifest_path) + "\n")
|
||||||
|
runtime_manifest.write_effective_runtime(
|
||||||
|
stage_dir,
|
||||||
|
runner="stage_agent_loop.run",
|
||||||
|
args=args,
|
||||||
|
spec_path=stage_manifest_path,
|
||||||
|
run_id=stage_manifest["stage_id"],
|
||||||
|
extra={
|
||||||
|
"module_name": stage_manifest.get("module_name"),
|
||||||
|
"title": stage_manifest.get("title"),
|
||||||
|
"pack_manifest": stage_manifest.get("pack_manifest"),
|
||||||
|
"repair_mode": dcl.normalize_repair_mode(getattr(args, "repair_mode", None) or stage_manifest.get("repair_mode")),
|
||||||
|
},
|
||||||
|
)
|
||||||
save_stage_context_capsule(stage_manifest, stage_dir)
|
save_stage_context_capsule(stage_manifest, stage_dir)
|
||||||
|
|
||||||
command = build_domain_pack_loop_command(args, stage_manifest, stage_dir)
|
command = build_domain_pack_loop_command(args, stage_manifest, stage_dir)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,158 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||||
|
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
|
|
||||||
|
|
||||||
|
def write_text(path: Path, text: str) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def write_json(path: Path, payload: object) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def create_prompt_registry(repo_root: Path, *, preset_version: str = "normalizer_v2_0_2") -> None:
|
||||||
|
write_text(
|
||||||
|
repo_root / "llm_normalizer" / "backend" / "src" / "config.ts",
|
||||||
|
'export const DEFAULT_PROMPT_VERSION = process.env.DEFAULT_PROMPT_VERSION ?? "normalizer_v2_0_2";\n'
|
||||||
|
"export const FEATURE_ASSISTANT_ADDRESS_QUERY_V1 = toBooleanFlag(\n"
|
||||||
|
" process.env.FEATURE_ASSISTANT_ADDRESS_QUERY_V1,\n"
|
||||||
|
" true\n"
|
||||||
|
");\n",
|
||||||
|
)
|
||||||
|
for relative_path in runtime_manifest.BUILTIN_PROMPT_FILES["normalizer_v2_0_2"].values():
|
||||||
|
write_text(repo_root / "llm_normalizer" / "prompts" / relative_path, f"{relative_path}\n")
|
||||||
|
write_json(
|
||||||
|
repo_root / "llm_normalizer" / "data" / "presets" / "preset-current.json",
|
||||||
|
{"prompt_version": preset_version},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AgentRuntimeManifestTests(unittest.TestCase):
|
||||||
|
def test_prompt_registry_health_passes_for_complete_matching_registry(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
repo_root = Path(tmp)
|
||||||
|
create_prompt_registry(repo_root)
|
||||||
|
|
||||||
|
health = runtime_manifest.build_prompt_registry_health(repo_root)
|
||||||
|
|
||||||
|
self.assertEqual(health["status"], "pass")
|
||||||
|
self.assertEqual(health["prompt_source"], "file")
|
||||||
|
self.assertEqual(health["active_prompt_version"], "normalizer_v2_0_2")
|
||||||
|
self.assertTrue(health["prompt_hash"])
|
||||||
|
self.assertFalse(health["failures"])
|
||||||
|
|
||||||
|
def test_prompt_registry_health_fails_on_preset_mismatch_when_strict(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
repo_root = Path(tmp)
|
||||||
|
create_prompt_registry(repo_root, preset_version="normalizer_v1")
|
||||||
|
|
||||||
|
health = runtime_manifest.build_prompt_registry_health(repo_root)
|
||||||
|
|
||||||
|
self.assertEqual(health["status"], "fail")
|
||||||
|
self.assertTrue(any(str(item).startswith("preset_version_mismatch:") for item in health["failures"]))
|
||||||
|
|
||||||
|
def test_effective_runtime_manifest_records_runner_and_llm_settings(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
repo_root = Path(tmp)
|
||||||
|
create_prompt_registry(repo_root)
|
||||||
|
args = argparse.Namespace(
|
||||||
|
backend_url="http://127.0.0.1:8787",
|
||||||
|
mcp_proxy_url="http://127.0.0.1:6003",
|
||||||
|
mcp_channel="default",
|
||||||
|
llm_provider="local",
|
||||||
|
llm_model="test-model",
|
||||||
|
llm_base_url="http://127.0.0.1:1234/v1",
|
||||||
|
temperature=0.0,
|
||||||
|
max_output_tokens=2048,
|
||||||
|
prompt_version="normalizer_v2_0_2",
|
||||||
|
use_mock=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
manifest = runtime_manifest.build_effective_runtime_manifest(
|
||||||
|
runner="domain_truth_harness.run-live",
|
||||||
|
args=args,
|
||||||
|
repo_root=repo_root,
|
||||||
|
spec_path=repo_root / "docs" / "orchestration" / "spec.json",
|
||||||
|
output_dir=repo_root / "artifacts" / "domain_runs" / "run",
|
||||||
|
run_id="run",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(manifest["runner"], "domain_truth_harness.run-live")
|
||||||
|
self.assertEqual(manifest["llm_model"], "test-model")
|
||||||
|
self.assertEqual(manifest["temperature"], 0.0)
|
||||||
|
self.assertEqual(manifest["max_output_tokens"], 2048)
|
||||||
|
self.assertEqual(manifest["prompt_registry_status"], "pass")
|
||||||
|
|
||||||
|
def test_effective_runtime_manifest_resolves_address_runtime_prompt_to_default_registry(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
repo_root = Path(tmp)
|
||||||
|
create_prompt_registry(repo_root)
|
||||||
|
args = argparse.Namespace(
|
||||||
|
backend_url="http://127.0.0.1:8787",
|
||||||
|
mcp_proxy_url="http://127.0.0.1:6003",
|
||||||
|
mcp_channel="default",
|
||||||
|
llm_provider="local",
|
||||||
|
llm_model="test-model",
|
||||||
|
llm_base_url="http://127.0.0.1:1234/v1",
|
||||||
|
temperature=0.0,
|
||||||
|
max_output_tokens=2048,
|
||||||
|
prompt_version="address_query_runtime_v1",
|
||||||
|
use_mock=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
manifest = runtime_manifest.build_effective_runtime_manifest(
|
||||||
|
runner="domain_case_loop.run-pack",
|
||||||
|
args=args,
|
||||||
|
repo_root=repo_root,
|
||||||
|
spec_path=repo_root / "docs" / "orchestration" / "spec.json",
|
||||||
|
output_dir=repo_root / "artifacts" / "domain_runs" / "run",
|
||||||
|
run_id="run",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(manifest["requested_prompt_version"], "address_query_runtime_v1")
|
||||||
|
self.assertEqual(manifest["assistant_runtime_prompt_version"], "address_query_runtime_v1")
|
||||||
|
self.assertEqual(manifest["prompt_version"], "normalizer_v2_0_2")
|
||||||
|
self.assertEqual(manifest["prompt_resolution"]["mode"], "assistant_runtime_schema_uses_default_normalizer_prompt")
|
||||||
|
self.assertEqual(manifest["prompt_source"], "file")
|
||||||
|
self.assertTrue(manifest["prompt_hash"])
|
||||||
|
self.assertEqual(manifest["prompt_registry_status"], "pass")
|
||||||
|
|
||||||
|
def test_load_effective_runtime_manifest_refuses_failing_prompt_registry(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
run_dir = Path(tmp)
|
||||||
|
runtime_manifest.write_json(
|
||||||
|
run_dir / runtime_manifest.EFFECTIVE_RUNTIME_FILE_NAME,
|
||||||
|
{
|
||||||
|
"schema_version": runtime_manifest.EFFECTIVE_RUNTIME_SCHEMA_VERSION,
|
||||||
|
"git_sha": "test-sha",
|
||||||
|
"runner": "domain_case_loop.run-pack",
|
||||||
|
"llm_model": "test-model",
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"prompt_version": "address_query_runtime_v1",
|
||||||
|
"prompt_source": "unknown",
|
||||||
|
"prompt_hash": None,
|
||||||
|
"prompt_registry_status": "fail",
|
||||||
|
"prompt_registry_failures": ["prompt_hash_unavailable"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(RuntimeError, "failing prompt registry status|missing prompt_hash"):
|
||||||
|
runtime_manifest.load_effective_runtime_manifest(run_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
|
|
@ -65,6 +65,10 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
||||||
analyst_verdict_path=analyst_verdict_path,
|
analyst_verdict_path=analyst_verdict_path,
|
||||||
repair_targets_path=repair_targets_path,
|
repair_targets_path=repair_targets_path,
|
||||||
business_audit_path=business_audit_path,
|
business_audit_path=business_audit_path,
|
||||||
|
business_audit_json_path=iteration_dir / "business_audit.json",
|
||||||
|
issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json",
|
||||||
|
rerun_matrix_path=iteration_dir / "rerun_matrix.json",
|
||||||
|
detector_candidates_path=iteration_dir / "detector_candidates.json",
|
||||||
analyst_verdict=analyst_verdict,
|
analyst_verdict=analyst_verdict,
|
||||||
repair_targets=repair_targets,
|
repair_targets=repair_targets,
|
||||||
target_score=88,
|
target_score=88,
|
||||||
|
|
@ -90,8 +94,114 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
||||||
self.assertEqual(saved["status"], "lead_coder_repair_required")
|
self.assertEqual(saved["status"], "lead_coder_repair_required")
|
||||||
self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape")
|
self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape")
|
||||||
self.assertIn("business_audit", saved["artifact_refs"])
|
self.assertIn("business_audit", saved["artifact_refs"])
|
||||||
|
self.assertIn("business_audit_json", saved["artifact_refs"])
|
||||||
|
self.assertIn("issue_catalog_snapshot", saved["artifact_refs"])
|
||||||
|
self.assertIn("business_direct_answer_missing", saved["issue_codes"])
|
||||||
|
self.assertIn("failed_scenario", saved["rerun_matrix"])
|
||||||
self.assertTrue(latest_handoff_exists)
|
self.assertTrue(latest_handoff_exists)
|
||||||
|
|
||||||
|
def test_business_audit_contract_exposes_repair_issue_contract(self) -> None:
|
||||||
|
repair_targets = {
|
||||||
|
"target_count": 1,
|
||||||
|
"severity_counts": {"P0": 1},
|
||||||
|
"priority_foci": [],
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"target_id": "margin_pack:s01",
|
||||||
|
"scenario_id": "margin_pack",
|
||||||
|
"step_id": "s01",
|
||||||
|
"severity": "P0",
|
||||||
|
"issue_code": "margin_domain_leak_accounting_route",
|
||||||
|
"question_resolved": "Which item had the best margin?",
|
||||||
|
"fix_goal": "Route the question to margin profitability instead of accounting noise.",
|
||||||
|
"evidence_paths": ["artifacts/domain_runs/margin_pack/steps/s01/output.md"],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
contract = dcl.build_business_audit_contract(
|
||||||
|
analyst_verdict={
|
||||||
|
"quality_score": 31,
|
||||||
|
"loop_decision": "partial",
|
||||||
|
"user_intent_summary": "User needs item margin ranking.",
|
||||||
|
"expected_direct_answer": "Best item by gross margin.",
|
||||||
|
"actual_direct_answer": "Accounting route answer.",
|
||||||
|
},
|
||||||
|
repair_targets=repair_targets,
|
||||||
|
target_score=88,
|
||||||
|
loop_decision="partial",
|
||||||
|
analyst_accepted_gate=False,
|
||||||
|
accepted_gate=False,
|
||||||
|
deterministic_gate_ok=False,
|
||||||
|
deterministic_gate_reason="P0 repair target remains",
|
||||||
|
business_audit_markdown_path=Path("business_audit.md"),
|
||||||
|
analyst_verdict_path=Path("analyst_verdict.json"),
|
||||||
|
repair_targets_path=Path("repair_targets.json"),
|
||||||
|
business_audit_json_path=Path("business_audit.json"),
|
||||||
|
issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"),
|
||||||
|
rerun_matrix_path=Path("rerun_matrix.json"),
|
||||||
|
detector_candidates_path=Path("detector_candidates.json"),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(contract["overall_status"], "partial")
|
||||||
|
self.assertEqual(contract["blocking_issues"][0]["issue_code"], "margin_domain_leak_accounting_route")
|
||||||
|
self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1")
|
||||||
|
self.assertIn("failed_margin_scenario", contract["rerun_matrix"])
|
||||||
|
self.assertIn("detector_candidates_json", contract["artifact_refs"])
|
||||||
|
|
||||||
|
def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None:
|
||||||
|
repair_targets = {
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"target_id": "margin_pack:s01",
|
||||||
|
"issue_code": "margin_domain_leak_accounting_route",
|
||||||
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
|
||||||
|
"forbidden_patch_targets": ["global orchestration rewrite"],
|
||||||
|
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
assigned_focus = {
|
||||||
|
"focus_id": "route|addressIntentResolver",
|
||||||
|
"issue_codes": ["margin_domain_leak_accounting_route"],
|
||||||
|
"root_cause_layers": ["intent", "route"],
|
||||||
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
|
||||||
|
"forbidden_patch_targets": ["global orchestration rewrite"],
|
||||||
|
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
|
||||||
|
"target_ids": ["margin_pack:s01"],
|
||||||
|
}
|
||||||
|
|
||||||
|
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||||||
|
|
||||||
|
self.assertFalse(gate["allowed"])
|
||||||
|
self.assertIn("issue_code_not_allowlisted:margin_domain_leak_accounting_route", gate["blocking_reasons"])
|
||||||
|
|
||||||
|
def test_auto_coder_gate_allows_complete_answer_surface_contract(self) -> None:
|
||||||
|
repair_targets = {
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"target_id": "pack:s01",
|
||||||
|
"issue_code": "business_direct_answer_missing",
|
||||||
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||||
|
"forbidden_patch_targets": ["routing rewrites"],
|
||||||
|
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
assigned_focus = {
|
||||||
|
"focus_id": "answer_shape|composeStage",
|
||||||
|
"issue_codes": ["business_direct_answer_missing"],
|
||||||
|
"root_cause_layers": ["answer_surface"],
|
||||||
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||||
|
"forbidden_patch_targets": ["routing rewrites"],
|
||||||
|
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||||
|
"target_ids": ["pack:s01"],
|
||||||
|
}
|
||||||
|
|
||||||
|
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||||||
|
|
||||||
|
self.assertTrue(gate["allowed"])
|
||||||
|
self.assertEqual(gate["reason"], "auto_coder_gate_passed")
|
||||||
|
|
||||||
def test_analyst_priority_targets_become_lead_repair_targets(self) -> None:
|
def test_analyst_priority_targets_become_lead_repair_targets(self) -> None:
|
||||||
repair_targets = {
|
repair_targets = {
|
||||||
"pack_id": "demo_pack",
|
"pack_id": "demo_pack",
|
||||||
|
|
|
||||||
|
|
@ -574,6 +574,123 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
|
||||||
self.assertTrue(step_state["runtime_factual_answer_validated"])
|
self.assertTrue(step_state["runtime_factual_answer_validated"])
|
||||||
self.assertEqual(step_state["acceptance_status"], "validated")
|
self.assertEqual(step_state["acceptance_status"], "validated")
|
||||||
|
|
||||||
|
def test_business_result_mode_accepts_clean_margin_confirmed_balance(self) -> None:
|
||||||
|
question = (
|
||||||
|
"\u043a\u0430\u043a\u0430\u044f "
|
||||||
|
"\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440\u0430 "
|
||||||
|
"\u0431\u044b\u043b\u0430 \u0441\u0430\u043c\u043e\u0439 "
|
||||||
|
"\u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0439 "
|
||||||
|
"\u0432 2020"
|
||||||
|
)
|
||||||
|
answer_text = (
|
||||||
|
"\u0421\u0430\u043c\u0430\u044f "
|
||||||
|
"\u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u0430\u044f "
|
||||||
|
"\u043f\u043e\u0437\u0438\u0446\u0438\u044f \u0437\u0430 "
|
||||||
|
"\u043f\u0435\u0440\u0438\u043e\u0434 2020: "
|
||||||
|
"\u0422\u043e\u0432\u0430\u0440 A \u2014 "
|
||||||
|
"\u043c\u0430\u0440\u0436\u0430 42%, "
|
||||||
|
"\u0432\u044b\u0440\u0443\u0447\u043a\u0430 100 000 "
|
||||||
|
"\u0440\u0443\u0431., "
|
||||||
|
"\u0441\u0435\u0431\u0435\u0441\u0442\u043e\u0438\u043c\u043e\u0441\u0442\u043d\u0430\u044f "
|
||||||
|
"\u0431\u0430\u0437\u0430 58 000 "
|
||||||
|
"\u0440\u0443\u0431., "
|
||||||
|
"\u0432\u0430\u043b\u043e\u0432\u0430\u044f "
|
||||||
|
"\u0440\u0430\u0437\u043d\u0438\u0446\u0430 42 000 "
|
||||||
|
"\u0440\u0443\u0431.\n"
|
||||||
|
"\u0421\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0439 "
|
||||||
|
"\u0448\u0430\u0433: \u043c\u043e\u0433\u0443 "
|
||||||
|
"\u0440\u0430\u0441\u043a\u0440\u044b\u0442\u044c "
|
||||||
|
"\u0441\u0442\u0440\u043e\u043a\u0438 "
|
||||||
|
"\u0432\u044b\u0440\u0443\u0447\u043a\u0438 \u0438 "
|
||||||
|
"\u0441\u0435\u0431\u0435\u0441\u0442\u043e\u0438\u043c\u043e\u0441\u0442\u043d\u043e\u0439 "
|
||||||
|
"\u0431\u0430\u0437\u044b."
|
||||||
|
)
|
||||||
|
step_state = dcl.build_scenario_step_state(
|
||||||
|
scenario_id="margin_result_mode_demo",
|
||||||
|
domain="margin_profitability",
|
||||||
|
step={
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Margin ranking",
|
||||||
|
"depends_on": [],
|
||||||
|
"question_template": question,
|
||||||
|
"expected_intents": ["inventory_margin_ranking_for_nomenclature"],
|
||||||
|
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
|
||||||
|
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
|
||||||
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
},
|
||||||
|
step_index=1,
|
||||||
|
question_resolved=question,
|
||||||
|
analysis_context={},
|
||||||
|
turn_artifact={
|
||||||
|
"assistant_message": {
|
||||||
|
"reply_type": "factual",
|
||||||
|
"text": answer_text,
|
||||||
|
"message_id": "msg-1",
|
||||||
|
"trace_id": "trace-1",
|
||||||
|
},
|
||||||
|
"technical_debug_payload": {
|
||||||
|
"detected_mode": "address_query",
|
||||||
|
"detected_intent": "inventory_margin_ranking_for_nomenclature",
|
||||||
|
"selected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
|
||||||
|
"capability_id": "inventory_inventory_margin_ranking_for_nomenclature",
|
||||||
|
"capability_route_mode": "exact",
|
||||||
|
"fallback_type": "none",
|
||||||
|
"mcp_call_status": "matched_non_empty",
|
||||||
|
"response_type": "FACTUAL_SUMMARY",
|
||||||
|
"result_mode": "confirmed_balance",
|
||||||
|
"truth_mode": "confirmed",
|
||||||
|
"answer_shape": "confirmed_factual",
|
||||||
|
"balance_confirmed": True,
|
||||||
|
},
|
||||||
|
"session_summary": {},
|
||||||
|
},
|
||||||
|
entries=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(step_state["execution_status"], "exact")
|
||||||
|
self.assertNotIn("wrong_result_mode", step_state["violated_invariants"])
|
||||||
|
self.assertEqual(step_state["business_first_review"]["issue_codes"], [])
|
||||||
|
self.assertEqual(step_state["acceptance_status"], "validated")
|
||||||
|
|
||||||
|
def test_literal_result_mode_contract_still_rejects_mismatch(self) -> None:
|
||||||
|
step_state = dcl.build_scenario_step_state(
|
||||||
|
scenario_id="literal_result_mode_demo",
|
||||||
|
domain="inventory",
|
||||||
|
step={
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Literal result mode",
|
||||||
|
"depends_on": [],
|
||||||
|
"question_template": "show stock",
|
||||||
|
"expected_result_mode": "exact_inventory_balance",
|
||||||
|
},
|
||||||
|
step_index=1,
|
||||||
|
question_resolved="show stock",
|
||||||
|
analysis_context={},
|
||||||
|
turn_artifact={
|
||||||
|
"assistant_message": {
|
||||||
|
"reply_type": "factual",
|
||||||
|
"text": "Short: stock is confirmed.",
|
||||||
|
"message_id": "msg-1",
|
||||||
|
"trace_id": "trace-1",
|
||||||
|
},
|
||||||
|
"technical_debug_payload": {
|
||||||
|
"detected_mode": "address_query",
|
||||||
|
"fallback_type": "none",
|
||||||
|
"mcp_call_status": "matched_non_empty",
|
||||||
|
"response_type": "FACTUAL_SUMMARY",
|
||||||
|
"result_mode": "confirmed_balance",
|
||||||
|
"truth_mode": "confirmed",
|
||||||
|
"answer_shape": "confirmed_factual",
|
||||||
|
"balance_confirmed": True,
|
||||||
|
},
|
||||||
|
"session_summary": {},
|
||||||
|
},
|
||||||
|
entries=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("wrong_result_mode", step_state["violated_invariants"])
|
||||||
|
|
||||||
def test_exact_confirmed_document_followup_sets_runtime_factual_validation(self) -> None:
|
def test_exact_confirmed_document_followup_sets_runtime_factual_validation(self) -> None:
|
||||||
step_state = dcl.build_scenario_step_state(
|
step_state = dcl.build_scenario_step_state(
|
||||||
scenario_id="svk_pivot",
|
scenario_id="svk_pivot",
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,84 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||||
|
|
||||||
|
import agent_runtime_manifest as runtime_manifest
|
||||||
import save_agent_semantic_run as saver
|
import save_agent_semantic_run as saver
|
||||||
|
|
||||||
|
|
||||||
|
def write_json(path: Path, payload: object) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
class SaveAgentSemanticRunTests(unittest.TestCase):
|
class SaveAgentSemanticRunTests(unittest.TestCase):
|
||||||
|
def write_clean_truth_run(self, run_dir: Path, *, include_runtime: bool) -> None:
|
||||||
|
write_json(
|
||||||
|
run_dir / "pack_state.json",
|
||||||
|
{
|
||||||
|
"final_status": "accepted",
|
||||||
|
"review_overall_status": "pass",
|
||||||
|
"acceptance_gate_passed": True,
|
||||||
|
"no_unresolved_p0": True,
|
||||||
|
"unresolved_p0_count": 0,
|
||||||
|
"steps_total": 1,
|
||||||
|
"steps_passed": 1,
|
||||||
|
"steps_failed": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
write_json(run_dir / "truth_review.json", {"summary": {"overall_status": "pass"}})
|
||||||
|
write_json(
|
||||||
|
run_dir / "business_review.json",
|
||||||
|
{
|
||||||
|
"overall_business_status": "pass",
|
||||||
|
"steps_with_business_failures": 0,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if include_runtime:
|
||||||
|
write_json(
|
||||||
|
run_dir / runtime_manifest.EFFECTIVE_RUNTIME_FILE_NAME,
|
||||||
|
{
|
||||||
|
"schema_version": runtime_manifest.EFFECTIVE_RUNTIME_SCHEMA_VERSION,
|
||||||
|
"runner": "domain_truth_harness.run-live",
|
||||||
|
"git_sha": "test-sha",
|
||||||
|
"llm_provider": "local",
|
||||||
|
"llm_model": "test-model",
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"prompt_version": "normalizer_v2_0_2",
|
||||||
|
"prompt_source": "file",
|
||||||
|
"prompt_hash": "abc123",
|
||||||
|
"prompt_registry_status": "pass",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_validate_truth_harness_run_refuses_missing_effective_runtime(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
run_dir = Path(tmp)
|
||||||
|
self.write_clean_truth_run(run_dir, include_runtime=False)
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(RuntimeError, "reproducibility manifest"):
|
||||||
|
saver.validate_truth_harness_run_dir(run_dir)
|
||||||
|
|
||||||
|
def test_validate_truth_harness_run_includes_effective_runtime_summary(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
run_dir = Path(tmp)
|
||||||
|
self.write_clean_truth_run(run_dir, include_runtime=True)
|
||||||
|
|
||||||
|
metadata = saver.validate_truth_harness_run_dir(run_dir)
|
||||||
|
|
||||||
|
self.assertEqual(metadata["validation_status"], "accepted_live_replay")
|
||||||
|
self.assertEqual(metadata["effective_runtime"]["runner"], "domain_truth_harness.run-live")
|
||||||
|
self.assertEqual(metadata["effective_runtime"]["llm_model"], "test-model")
|
||||||
|
|
||||||
def test_extract_questions_resolves_scenario_pack_bindings(self) -> None:
|
def test_extract_questions_resolves_scenario_pack_bindings(self) -> None:
|
||||||
spec = {
|
spec = {
|
||||||
"schema_version": "domain_scenario_pack_v1",
|
"schema_version": "domain_scenario_pack_v1",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue