{"path": "scripts/check-briefs.py", "filename": "check-briefs.py", "size_bytes": 324690, "ext": ".py", "content": "#!/usr/bin/env python3\n\"\"\"\nShadow Dynamics — unified QUALITY runner.\n\nSingle source of truth for all editorial/structural checks against a brief\nHTML. Usable as:\n\n  CLI:    python3 scripts/check-briefs.py <file-or-glob>\n  Lib:    from check_briefs import run_checks\n  HTTP:   POST /quality-check {\"htmlContent\": \"...\"}  (brief-saver)\n\nNOTE — module caching: brief-saver.py imports this file once and caches the\nmodule in sys.modules['sd_check_briefs'] for the lifetime of the process.\nEditing this file does NOT affect the HTTP endpoint until the service is\nrestarted: `sudo systemctl restart brief-saver`. The CLI re-imports per\ninvocation, so `python3 scripts/check-briefs.py` always reflects the\non-disk version. Test battery (`scripts/test-system.py`) also runs CLI-side,\nso 72/72 passing does not prove the live API picked up the changes.\n\nEach check returns either None (passed) or a string (failure reason).\nChecks are categorised:\n  - BLOCKERS: hard fails — would refuse publication\n  - WARNINGS: soft fails — logged for review\n\nVeracity (numerical-claim correctness) is OUT OF SCOPE for this runner;\nthat requires an LLM-judge or live web verification. The runner only\ncatches structural/editorial regressions and known patterns observed in\nprior failure modes.\n\nWhen a check fails in production (called from n8n), the runner appends to\n/var/log/sd-quality-failures.log so failure modes can be reviewed and\nthe prompt template iterated. That log is the input side of the feedback\nloop.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport glob\nimport json\nimport os\nimport re\nimport sys\nfrom dataclasses import dataclass, asdict\nfrom datetime import date, datetime, timezone\nfrom typing import Callable\n\n# ── --sample helpers ─────────────────────────────────────────────────────────\n# Added 2026-05-27 PM per [[feedback_w_report_raw_count_vs_sampled_population]]\n# (N=3 false-positive clusters surfaced same-session on Brief 8: D01 89→18 hits,\n# P03 59→0 hits). Goal: mechanize the sample-the-population discipline so a\n# reader of any W-report can verify raw hit counts vs FP contamination by\n# re-running the runner with `--sample [N]`. Pattern: each check that fires\n# may attach a list of categorized samples to its result, rendered as an\n# indented block under the warning line.\n\n# Content-pattern bucket classifiers — heuristics on the bolded body text.\n# Order matters: most specific (confidence-band, header) first; magnitude-vs-\n# narrative buckets last; `other` is the fallback.\n_SAMPLE_CONFIDENCE_BAND_RE = re.compile(\n    r'^\\s*\\d+(?:[.,]\\d+)?\\s*[–\\-]\\s*\\d+(?:[.,]\\d+)?\\s*%',\n)\n_SAMPLE_PRED_HEADER_RE = re.compile(\n    r'^\\s*(?:PRED|SIGNAL|SE[ÑN]AL|Alert|Alerta|Scenario|Escenario)'\n    r'\\s+(?:\\d+[A-Z]?|[A-Z])\\s*[—–\\-]',\n    re.IGNORECASE,\n)\n_SAMPLE_BARE_MAGNITUDE_RE = re.compile(\n    r'^\\s*[~+\\-]?[€$£¥]?\\s*\\d+(?:[.,]\\d+)?\\s*[BMK%]?\\s*$',\n)\n_SAMPLE_MAGNITUDE_PRESENCE_RE = re.compile(\n    r'\\d+(?:[.,]\\d+)?\\s*[%BMK]?|[€$£¥]\\s*\\d+',\n)\n_SAMPLE_YEAR_RE = re.compile(r'\\b(?:19|20)\\d{2}\\b')\n\n\ndef _classify_sample_bucket(text: str) -> str:\n    \"\"\"Heuristic content-pattern classifier for --sample output.\n\n    Buckets (in priority order):\n      - confidence-band:          e.g. \"60-75%\" / \"55–65 %\"\n      - pred-signal-scenario-header: e.g. \"PRED 2 — ...\" / \"Scenario 5A — ...\"\n      - bare-magnitude:           e.g. \"~95%\" / \"$55.5B\" / \"+43%\"\n      - magnitude-with-context:   has magnitude + ≥4 words of prose\n      - narrative-phrase-with-year: >40 chars + 4-digit year, no magnitude\n      - other:                    fallback\n    \"\"\"\n    s = text.strip() if text else ''\n    if not s:\n        return 'other'\n    if _SAMPLE_CONFIDENCE_BAND_RE.match(s):\n        return 'confidence-band'\n    if _SAMPLE_PRED_HEADER_RE.match(s):\n        return 'pred-signal-scenario-header'\n    if _SAMPLE_BARE_MAGNITUDE_RE.match(s):\n        return 'bare-magnitude'\n    has_magnitude = bool(_SAMPLE_MAGNITUDE_PRESENCE_RE.search(s))\n    word_count = len(re.findall(r'\\b\\w+\\b', s))\n    if has_magnitude and word_count >= 4:\n        return 'magnitude-with-context'\n    if len(s) > 40 and _SAMPLE_YEAR_RE.search(s) and not has_magnitude:\n        return 'narrative-phrase-with-year'\n    return 'other'\n\n\n_SAMPLE_OPEN_TAG_RE = re.compile(\n    r'<(h2|h3|h4|p|td|th|li|strong|b|div|span)\\b[^>]*>',\n    re.IGNORECASE,\n)\n\n\ndef _enclosing_element(html: str, pos: int, lookback: int = 200) -> str:\n    \"\"\"Look back ~`lookback` chars in `html` from `pos` for the most recent\n    open tag (h2/h3/h4/p/td/th/li/strong/b/div/span). Returns tag name or '?'.\n    \"\"\"\n    start = max(0, pos - lookback)\n    window = html[start:pos]\n    last = None\n    for m in _SAMPLE_OPEN_TAG_RE.finditer(window):\n        last = m.group(1).lower()\n    return last or '?'\n\n\ndef _format_samples_block(samples: list[dict], limit: int) -> str:\n    \"\"\"Render a list of {container, content, strike_type, bucket} dicts as a\n    multi-line indented block. Truncates content to ~80 chars.\n    \"\"\"\n    if not samples:\n        return ''\n    lines: list[str] = []\n    for s in samples[:limit]:\n        content = (s.get('content') or '').strip()\n        if len(content) > 80:\n            content = content[:77] + '...'\n        container = s.get('container', '?')\n        bucket = s.get('bucket', 'other')\n        strike = s.get('strike_type', '')\n        strike_part = f' [{strike}]' if strike else ''\n        lines.append(\n            f'      - <{container}> [{bucket}]{strike_part} {content!r}'\n        )\n    total = len(samples)\n    shown = min(limit, total)\n    if total > shown:\n        lines.append(f'      ... ({total - shown} more sample(s) omitted)')\n    return '\\n'.join(lines)\n\n\n# ── BLOCKERS ─────────────────────────────────────────────────────────────────\n\ndef check_scaffold_pseudo_citations(html: str) -> str | None:\n    \"\"\"True scaffold leaks: bare markers without content, or pseudo-pipe table breaks.\n\n    Content-bearing markers like [WEB: FT] or [INFERENCE: based on X+Y] are\n    intentional citations per EDITORIAL-CITE-DETAIL-01 and must remain in\n    output. This check only blocks markers that are clearly unfilled templates\n    or markers broken across HTML cells.\n    \"\"\"\n    patterns = [\n        # Bare markers — model emitted the template label but no content.\n        (r'\\[WEB\\s*\\]|\\[WEB:\\s*\\]', 'bare_web_marker'),\n        (r'\\[INFERENCE\\s*\\]|\\[INFERENCE:\\s*\\]', 'bare_inference_marker'),\n        (r'\\[POLL\\s*\\]|\\[POLL:\\s*\\]', 'bare_poll_marker'),\n        # Pseudo-pipe form that breaks tables when rendered:\n        (r'\\[\\w+\\s*\\\\?</td><td>', 'table_broken_citation'),\n    ]\n    hits = [name for rx, name in patterns if re.search(rx, html)]\n    return f'scaffold pseudo-citations: {\", \".join(hits)}' if hits else None\n\n\ndef check_result_label_duplicate(html: str) -> str | None:\n    \"\"\"RESULT/RESULTADO duplicate prefix from prompt template.\"\"\"\n    if re.search(r'RESULT:\\s*RESULT(?:ADO)?', html):\n        return 'duplicated RESULT/RESULTADO label'\n    return None\n\n\ndef check_preflight_section_leak(html: str) -> str | None:\n    \"\"\"Pre-flight section §0 should never reach published HTML.\"\"\"\n    if re.search(r'PRE-FLIGHT VERIFICATION|VERIFICACI[ÓO]N PREVIA', html):\n        return 'pre-flight verification section leaked'\n    return None\n\n\ndef check_event_date_leak(html: str) -> str | None:\n    \"\"\"`EVENT DATE:` and `FORMAT DECISION` markers from prompt scaffolding.\"\"\"\n    hits = []\n    if re.search(r'EVENT DATE:\\s', html):\n        hits.append('EVENT_DATE')\n    if re.search(r'FORMAT DECISION', html):\n        hits.append('FORMAT_DECISION')\n    return f'prompt scaffolding leaked: {\", \".join(hits)}' if hits else None\n\n\ndef check_inline_paywall(html: str) -> str | None:\n    \"\"\"Mid-content paywall divs — distinct from the legitimate footer CTA.\"\"\"\n    if re.search(\n        r'<div class=\"cta-box\"[^>]*style=\"margin:32px 0 0\"',\n        html,\n    ):\n        return 'inline paywall div mid-content'\n    if 'class=\"pw-full\"' in html or 'class=\"pw-btn\"' in html:\n        return 'pw-full/pw-btn paywall block present'\n    if re.search(r'filter:\\s*blur\\(\\s*5px', html):\n        return 'blur(5px) paywall overlay present'\n    if re.search(r'\\$19/(month|mes)\\b', html) and \\\n       'cta-box' in html.split('$19/', 1)[0][-200:]:\n        return 'inline price tag mid-content'\n    return None\n\n\ndef check_old_branding(html: str) -> str | None:\n    \"\"\"Brand renamed Forecaster → Intelligence on 2026-04-26.\"\"\"\n    if re.search(r'Shadow Dynamics Forecaster|SHADOW DYNAMICS FORECASTER',\n                 html):\n        return 'old branding \"Forecaster\" present'\n    return None\n\n\ndef check_about_scaffold_label_list(html: str) -> str | None:\n    \"\"\"About-section sentence enumerating scaffold tags as a feature.\"\"\"\n    if re.search(\n        r'\\[WEF\\],\\s*\\[EURASIA\\],\\s*\\[POLL\\],\\s*\\[WEB\\],\\s*\\[INFERENCE\\]',\n        html,\n    ):\n        return 'about-section lists scaffold-source tags as feature'\n    return None\n\n\ndef check_hero_scaffold_pill(html: str) -> str | None:\n    \"\"\"Hero pill that surfaced scaffold markers as a UI element.\"\"\"\n    if re.search(r'\\[WEF\\]\\s*\\[EURASIA\\]\\s*\\[POLL\\]\\s*\\[INFERENCE\\]', html):\n        return 'hero pill displays scaffold markers'\n    return None\n\n\ndef check_section_count_parity(html: str) -> str | None:\n    \"\"\"EN and ES h2 sections should have the same count (±1 tolerance).\"\"\"\n    en = len(re.findall(\n        r'<h2[^>]*class=\"[^\"]*\\blang-en\\b[^\"]*\"', html\n    ))\n    es = len(re.findall(\n        r'<h2[^>]*class=\"[^\"]*\\blang-es\\b[^\"]*\"', html\n    ))\n    if en == 0 and es == 0:\n        # Maybe wrapped sections, not lang-class h2 — skip\n        return None\n    if abs(en - es) > 1:\n        return f'EN/ES section count mismatch: EN={en}, ES={es}'\n    return None\n\n\ndef check_unbalanced_tables(html: str) -> str | None:\n    \"\"\"Catch broken tables (e.g. from the pseudo-pipe artefact).\"\"\"\n    opens = html.count('<table')\n    closes = html.count('</table>')\n    tr_o = html.count('<tr')\n    tr_c = html.count('</tr>')\n    td_o = html.count('<td')\n    td_c = html.count('</td>')\n    issues = []\n    if opens != closes:\n        issues.append(f'table {opens}/{closes}')\n    if tr_o != tr_c:\n        issues.append(f'tr {tr_o}/{tr_c}')\n    if td_o != td_c:\n        issues.append(f'td {td_o}/{td_c}')\n    return f'unbalanced HTML: {\"; \".join(issues)}' if issues else None\n\n\n# ── Structural-integrity helpers (Brief 7 prep priorities item 4) ──\n# Origin: BACKLOG §ESTA SEMANA #8 item 4 (pre-generation hardening); was\n# \"design-only; not yet ticketed.\" Activated 2026-05-12 PM via\n# BRIER-DISCIPLINE-SET-APPLICATION-01 autonomous task path.\n\n_H3_INSIDE_P_RE = re.compile(\n    r'<p\\b[^>]*>(?:(?!</p>)[\\s\\S]){0,2000}?<h3\\b',\n    re.IGNORECASE,\n)\n\n_DARK_BG_BARE_STRONG_RE = re.compile(\n    # Match a container with dark background, then within ~3000 chars (without\n    # closing the container) find a <strong> that does NOT have inline color\n    # style override. Anchors: hex bg starting #0/#1 (slate/navy range),\n    # CSS var --slate, or class cover/site-footer/dark-bg.\n    r'(?:background\\s*:\\s*(?:#[01][0-9a-fA-F]{5}\\b|var\\(\\s*--slate\\s*\\))|'\n    r'<(?:div|section|footer)[^>]*class=\"[^\"]*\\b(?:cover|site-footer|dark-bg)\\b[^\"]*\")'\n    r'[^>]*>(?:(?!</(?:div|section|footer)>)[\\s\\S]){0,3000}?'\n    r'<strong(?![^>]*style=\"[^\"]*color)',\n    re.IGNORECASE,\n)\n\n\ndef _check_toc_sync_issues(html: str) -> list[str]:\n    \"\"\"Return list of TOC anchor href targets that don't exist as id= in the document.\n\n    Scoped to anchors that look TOC-shaped (path is fragment-only, target is\n    section/§-style identifier). Filters out external links + self-canonical.\n    \"\"\"\n    # Collect all id= values in the document\n    ids = set(re.findall(r'\\bid=\"([^\"]+)\"', html, re.IGNORECASE))\n    # Collect href=\"#xxx\" fragments\n    anchors = re.findall(r'<a\\s+[^>]*href=\"#([^\"]+)\"', html, re.IGNORECASE)\n    # Filter to TOC-shape (alphanumeric + dash; skip pure-numeric footnote anchors\n    # like #fn1 that may be defined elsewhere via different schemes)\n    toc_shaped = [a for a in anchors\n                  if re.fullmatch(r'[a-zA-Z][\\w\\-§]{1,80}', a)\n                  and not a.startswith('fn')\n                  and not a.startswith('cite')]\n    missing = [a for a in toc_shaped if a not in ids]\n    # Dedupe preserving order\n    seen: set[str] = set()\n    out: list[str] = []\n    for a in missing:\n        if a not in seen:\n            seen.add(a)\n            out.append(a)\n    return out\n\n\ndef _check_sections_inside_tab_full(html: str) -> list[str]:\n    \"\"\"Detect <section class=\"sources|disclaimer\"> placed inside <div id=\"tab-full\">.\n\n    Mirror of test_sources_disclaimer_outside_tab_full in scripts/test-system.py\n    but scoped to single-brief input. Surfaces during quality_check (before\n    promote), not only via test battery.\n    \"\"\"\n    m = re.search(r'<div id=\"tab-full\"[^>]*>', html)\n    if not m:\n        return []\n    pos = m.end()\n    depth = 1\n    end_of_tab_full = None\n    for tag in re.finditer(r'<(/?)div\\b', html[pos:]):\n        if tag.group(1) == '/':\n            depth -= 1\n            if depth == 0:\n                end_of_tab_full = pos + tag.end()\n                break\n        else:\n            depth += 1\n    if end_of_tab_full is None:\n        return []\n    issues: list[str] = []\n    for sec_class in ('sources', 'disclaimer'):\n        sec_pos = html.find(f'<section class=\"{sec_class}\"')\n        if sec_pos != -1 and sec_pos < end_of_tab_full:\n            issues.append(sec_class)\n    return issues\n\n\ndef check_html_structural_integrity(html: str) -> str | None:\n    \"\"\"Detect 4 classes of structural malformation that render unpredictably or hide content.\n\n    Origin: BACKLOG §ESTA SEMANA #8 (Brief 7 prep priorities) item 4 — Brief 6\n    surfaced sources-inside-tab-full + dark-bg-strong invisibility patterns;\n    Brief 5 surfaced h3-inside-p; TOC drift hit Brief 2 / 4 historically. This\n    check consolidates the family as a single brief-level WARN.\n\n    Failure modes:\n\n      1. h3-inside-p — block-level <h3> nested inside an open <p>; HTML invalid\n         per the content-model rules; renders unpredictably across browsers and\n         RSS readers.\n\n      2. dark-bg-bare-strong — <strong> inside a dark-background container\n         (style background:#0X/#1X, var(--slate), or class cover/site-footer/\n         dark-bg) without an inline color override. The brief CSS template's\n         `strong{color:var(--slate)}` rule (#1A1F2E) renders bold labels\n         invisible against dark navy/slate backgrounds. First hit cluster:\n         a424cea (Brief 5 revisions) + feeff61 (Brief 6 executive alert).\n\n      3. toc-sync-broken — TOC anchor href targets missing as id= in document.\n         Drift cause: section retitled but TOC entry not updated, or\n         vice-versa. Symptom: in-page navigation breaks silently.\n\n      4. sections-inside-tab-full — <section class=\"sources|disclaimer\"> placed\n         inside <div id=\"tab-full\">. CSS rule `.tab-content{display:none}` on\n         inactive tab hides everything inside; readers on the default Brief\n         tab never see Sources or the legal disclaimer. Twice-bitten on Spain\n         Blackout 2026-04-27. Test-battery analog:\n         test_sources_disclaimer_outside_tab_full.\n\n    WARNING level — calibrate against Brief 7+8 emissions before promoting to\n    BLOCKER per feedback_runner_calibrate_then_ratchet. Reports each issue\n    distinctly so per-mode false-positive rates calibrate independently.\n    \"\"\"\n    issues: list[str] = []\n\n    if _H3_INSIDE_P_RE.search(html):\n        issues.append('h3-inside-p')\n\n    if _DARK_BG_BARE_STRONG_RE.search(html):\n        issues.append('dark-bg-bare-strong')\n\n    toc_missing = _check_toc_sync_issues(html)\n    if toc_missing:\n        sample = toc_missing[:3]\n        more = f' (+{len(toc_missing) - 3} more)' if len(toc_missing) > 3 else ''\n        issues.append(f'toc-sync-broken: missing id= for {sample}{more}')\n\n    tab_issues = _check_sections_inside_tab_full(html)\n    if tab_issues:\n        issues.append(f'sections-inside-tab-full: {\", \".join(tab_issues)}')\n\n    if issues:\n        return f'{len(issues)} structural integrity issue(s): {\" || \".join(issues)}'\n    return None\n\n\n# ── Posterior-predictive runner checks (FORECASTING-DISCIPLINE-LESSONS-2026-05-12-01\n#    sub-item 3): runner predicts brief properties that SHOULD appear under Tier A\n#    rules, then measures actual brief output. Catches drift between prompt-encoded\n#    discipline and emitted briefs. Per Gelman posterior-predictive-check pattern. ──\n\n# Tolerant of SD header style drift across briefs (audit 2026-05-13):\n#   - Bare \"FORMAL PREDICTIONS\" (Briefs 1-2)\n#   - \"10. FORMAL PREDICTIONS\" / \"10. PREDICCIONES FORMALES\" (Brief 3 numeric prefix)\n#   - \"SECTION VII: FORMAL PREDICTIONS\" / \"SECCIÓN VII: PREDICCIONES FORMALES\" (Brief 5)\n#   - \"SECTION VIII: FORMAL PREDICTION\" singular + \"SECCIÓN VIII: PREDICCIÓN FORMAL\" (Brief 6)\n# Excludes \"HISTORIAL DE PREDICCIONES\" (track record, semantically distinct).\n# Bug history: prior regex required plural English-only with optional \"§\" prefix\n# only; silently bypassed Brief 5 (SECTION VII prefix) + Brief 6 (singular bilingual)\n# + Brief 3 (numeric prefix). E3/E5/E9 posterior-predictive checks did not fire on\n# those briefs at promote-time. Filed as RUNNER-FORMAL-PREDICTIONS-REGEX-BILINGUAL-01.\n_FORMAL_PREDICTIONS_SECTION_RE = re.compile(\n    r'<h2[^>]*>\\s*'\n    r'(?:§\\s*[IVX]*\\s*|'\n    r'SECTION\\s+[IVX]+\\s*:?\\s*|'\n    r'SECCI[ÓO]N\\s+[IVX]+\\s*:?\\s*|'\n    r'\\d+\\.\\s*'\n    r')?'\n    r'(?:FORMAL\\s+PREDICTION(?:S)?|PREDICCI[ÓO]N(?:ES)?\\s+FORMAL(?:ES)?)'\n    r'[^<]*</h2>(.*?)(?=<h2|<footer\\b|$)',\n    re.IGNORECASE | re.DOTALL,\n)\n\n\ndef check_prediction_e3_cluster_id_reference(html: str) -> str | None:\n    \"\"\"E3-cluster-ref (posterior-predictive): when a prediction includes an\n    \"Independence:\" sentence, that sentence should reference the cluster taxonomy\n    OR positively assert structural independence — not just be a generic boilerplate.\n\n    Per Tier A E3 rule + data/prediction_clusters.yaml 5 clusters identified in\n    audit §7. Catches \"Independence: this prediction is independent.\" class of\n    empty assertion that doesn't engage the cluster framework.\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    ind_re = re.compile(\n        r'Independenc(?:e|ia)\\s*:\\s*([^.<]{5,400}\\.)',\n        re.IGNORECASE,\n    )\n    inds = ind_re.findall(section)\n    if not inds:\n        return None\n\n    issues = []\n    for i, ind_text in enumerate(inds, 1):\n        has_cluster_ref = bool(re.search(\n            r'\\bcluster[-_]?\\d|cluster[-_]?id|cluster\\s+\\w+|cluster-\\d-[a-z-]+',\n            ind_text,\n            re.IGNORECASE,\n        ))\n        has_structural_ind = bool(re.search(\n            r'structural(ly)?\\s+independ|independ\\w*\\s+from|no\\s+correlation|'\n            r'sin\\s+correlación|independencia\\s+estructural|distinct\\s+driver|'\n            r'unrelated\\s+upstream',\n            ind_text,\n            re.IGNORECASE,\n        ))\n        if not (has_cluster_ref or has_structural_ind):\n            issues.append(i)\n\n    if issues:\n        return (\n            f'E3 independence-cluster-ref: Independence sentence(s) at position(s) '\n            f'{issues} (1-indexed within Formal Predictions section) lack explicit '\n            f'cluster_id reference OR structural-independence assertion. Per Tier A '\n            f'E3 + data/prediction_clusters.yaml taxonomy, Independence sentences '\n            f'should engage the cluster framework, not state generic independence.'\n        )\n    return None\n\n\ndef check_prediction_e5_ternary_sum_100(html: str) -> str | None:\n    \"\"\"E5-ternary-sum (posterior-predictive): TERNARY-SCENARIO label triggers\n    expectation that 3 scenarios with explicit probabilities summing to 100% (±2%\n    tolerance for rounding) appear in the prediction block.\n\n    Per Tier A E5 rule. Catches \"TERNARY-SCENARIO: 40% / 35% / 20%\" (sums to 95%)\n    or \"TERNARY-SCENARIO: 50% / 30%\" (only 2 probabilities present) emission errors.\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    ternary_re = re.compile(r'TERNARY[-\\s]SCENARIO', re.IGNORECASE)\n    ternary_matches = list(ternary_re.finditer(section))\n    if not ternary_matches:\n        return None\n\n    issues = []\n    for i, t_match in enumerate(ternary_matches, 1):\n        # Window: 2000 chars after the label (covers most prediction block sizes)\n        window = section[t_match.end():t_match.end() + 2000]\n        # Find probabilities; constrain to 1-99 to avoid matching footnote numbers\n        probs = re.findall(r'\\b(\\d{1,2})\\s*%', window)\n        if len(probs) < 3:\n            issues.append(f'ternary #{i}: only {len(probs)} probability tokens found within 2000-char window')\n        else:\n            three = [int(p) for p in probs[:3]]\n            s = sum(three)\n            if not (98 <= s <= 102):\n                issues.append(f'ternary #{i}: probabilities {three}% sum to {s}% (expected 100%±2)')\n\n    if issues:\n        return (\n            f'E5 ternary-sum-100: TERNARY-SCENARIO emission(s) violate sum-to-100 '\n            f'expectation: {\"; \".join(issues)}. Per Tier A E5 rule, scenario-decomposed '\n            f'predictions must have ≥2 mutually-exclusive scenarios with probabilities '\n            f'summing to 100% (±2% rounding tolerance).'\n        )\n    return None\n\n\ndef check_prediction_e9_pos_threshold_numeric(html: str) -> str | None:\n    \"\"\"E9-POS-numeric (posterior-predictive): POS-THRESHOLD / POS-EVENT-DATE labels\n    trigger expectation that a numeric threshold (X% / $X / N units / over Y) appears\n    in proximity to the label, not narrative magnitude (\"substantial\" / \"large\").\n\n    Per Tier A E9 rule. Catches \"POS-THRESHOLD: substantial defence spending increase\"\n    (no number) vs the expected \"POS-THRESHOLD: ≥$400B annual defence procurement\".\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    pos_re = re.compile(\n        r'POS[-\\s](?:THRESHOLD|EVENT[-\\s]DATE|EVENT)',\n        re.IGNORECASE,\n    )\n    pos_matches = list(pos_re.finditer(section))\n    if not pos_matches:\n        return None\n\n    numeric_threshold_re = re.compile(\n        r'(?:≥|>=|>|at\\s+least|exceeds?|over|greater\\s+than|más\\s+de|al\\s+menos)\\s*'\n        r'[\\$€£]?\\s*[\\d,]+(?:\\.\\d+)?\\s*(?:[%]|billion|million|trillion|bn|mn|tn|'\n        r'\\b[a-zA-Z]{1,30}\\b)?',\n        re.IGNORECASE,\n    )\n    issues = []\n    for i, p_match in enumerate(pos_matches, 1):\n        # Window: 1000 chars after label\n        window = section[p_match.end():p_match.end() + 1000]\n        if not numeric_threshold_re.search(window):\n            label = section[p_match.start():p_match.end()].strip()\n            issues.append(f'POS label #{i} (\"{label}\")')\n\n    if issues:\n        return (\n            f'E9 POS-numeric: POS-event prediction(s) lack explicit numeric threshold '\n            f'within 1000-char proximity to class label: {\"; \".join(issues)}. Per Tier A '\n            f'E9 rule, POS-event predictions must specify quantitative threshold (≥ N / '\n            f'over $X / exceeds N% / at least Y units), not narrative magnitude.'\n        )\n    return None\n\n\n# ── A2 PREDICTION-EMISSION-VALIDATOR-01 (2026-05-15 PM) ─────────────────────\n# Semantic per-prediction Brier Tier A validation extending existing E3/E5/E9\n# checks with E1 base-rate-sentence + E2 named-verification-doc + E4 class-label.\n# WARN-only on Brief 7-8 (calibrate-then-ratchet per feedback_runner_calibrate_then_ratchet);\n# promote to BLOCKER Brief 9+ if FP rate ≤1/brief during calibration.\n\n_BASE_RATE_SENTENCE_RE = re.compile(\n    # Accepts: \"Base rate:\", \"Base rate anchoring:\", \"Base rate (E1):\",\n    # \"Base rate anchoring (E1):\", Spanish equivalents with/without (E1) marker.\n    # Brief 7+ Tier A format uses \"(E1):\" parenthesized E-marker explicitly.\n    r'(base\\s+rate(?:\\s+anchoring)?(?:\\s*\\(E1\\))?\\s*:|'\n    r'(?:anclaje\\s+de\\s+)?tasa\\s+base(?:\\s*\\(E1\\))?\\s*:|'\n    r'reference\\s+class(?:\\s*\\(E1\\))?\\s*:|'\n    r'clase\\s+de\\s+referencia(?:\\s*\\(E1\\))?\\s*:)',\n    re.IGNORECASE,\n)\n\n_NAMED_VERIFICATION_DOC_RE = re.compile(\n    r'(https?://[^\\s\"<>]+|'\n    r'\\bCIK\\s+\\d{7,}|'\n    r'\\b\\w+\\.(?:gov|edu|org|eu|int)/[^\\s\"<>]+|'\n    r'\\b(?:10-Q|10-K|8-K|S-1|20-F|6-K)\\b|'\n    r'\\bJC\\s+\\d{4}\\s+\\d{1,3}\\b|'\n    r'\"[^\"]{15,}\"|'\n    r'\\bMOFCOM\\s+(?:Notice|Announcement|Order)\\s+\\d+\\b|'\n    r'\\bReg(?:ulation)?\\s*\\(EU\\)\\s*\\d{4}/\\d{3,5}\\b|'\n    r'\\bArticle\\s+\\d+(?:\\(\\d+\\))?\\s+(?:of\\s+)?DORA\\b)',\n    re.IGNORECASE,\n)\n\n_CLASS_LABEL_RE = re.compile(\n    r'\\b(?:TERNARY-SCENARIO|POS-THRESHOLD|NEG-REG-DATE|'\n    r'NEG-EVENT-DATE|POS-EVENT-DATE|NEG-COMM-DATE|NEG-LEG-DATE)\\b',\n)\n\n# Prediction-block splitting: <h3> headings OR <div class=\"prediction*\"> or\n# \"Probability:\" / \"We assess\" / numbered list with prediction-content boundaries.\n# Conservative: require boundaries to be the start of a substantive prediction\n# (200+ chars in the block) — filters section preambles.\n_PREDICTION_BLOCK_BOUNDARY_RE = re.compile(\n    r'(?:<h3\\b[^>]*>|<div[^>]*class=\"[^\"]*prediction[^\"]*\"[^>]*>|'\n    r'PRED-\\d{8}-\\d{3}|PRED\\s+\\d+\\s*[:\\-—–]|'\n    r'(?:^|\\n|<p[^>]*>)\\s*\\(?\\d+\\)\\s+[A-Z][^.]{20,}\\s+[—–-]\\s+)',\n    re.IGNORECASE,\n)\n\n\ndef _extract_formal_prediction_blocks(html: str) -> list[str]:\n    \"\"\"Split ALL §FORMAL PREDICTIONS sections into per-prediction blocks.\n\n    Iterates EVERY FORMAL PREDICTIONS section (brief tab + full tab + ES mirrors\n    when present) via finditer. Filters to sections that emit Brier-discipline\n    full-prediction format (≥1 <h3> boundary) — excludes summary-view sections\n    (brief tab paragraphs without <h3> boundaries) which would false-positive on\n    E2/E4 checks designed for the rich-format full-prediction blocks.\n\n    Pre-2026-05-17 PM behavior: used .search() (first match only) which hit the\n    brief tab summary view and missed the full tab rich PREDs entirely. Result:\n    PRED 1 at brief-tab position [1] flagged for E2/E4 (false positive) while\n    full-tab PREDs with proper Verification document (E2) + Class (E4) fields\n    were never audited.\n\n    Returns list of prediction-block strings, each ≥200 chars (filters preambles).\n    Returns [] if no qualifying section found.\n    \"\"\"\n    blocks_out = []\n    _seen_pred_ids: set[str] = set()  # dedupe EN/ES mirror copies by PRED id\n    # Anchored PRED boundary: requires the marker to start at <strong>, <h3>, or\n    # line/paragraph start. Avoids false-positives from cross-references like\n    # \"Independence (E3): ... distinct from PRED 1 regulator-side axis ...\" which\n    # would otherwise split mid-sentence.\n    anchored_pred_re = re.compile(\n        r'(?:<h3\\b[^>]*>|<div[^>]*class=\"[^\"]*prediction[^\"]*\"[^>]*>|'\n        r'<strong>\\s*PRED\\s*\\d+\\s*[:\\-—–]|'\n        r'PRED-\\d{8}-\\d{3}\\b)',\n        re.IGNORECASE,\n    )\n    for m in _FORMAL_PREDICTIONS_SECTION_RE.finditer(html):\n        section = m.group(1)\n        # Heuristic: full-prediction (Tier A) sections emit explicit E-marker\n        # labels like \"Verification document (E2):\" or \"Class (E4):\" or\n        # \"Independence (E3):\". Summary-view sections (brief tab paragraphs)\n        # use lowercase \"Verification:\" without the (E#) marker. Skip\n        # summary sections — they're intentionally compressed and should\n        # not be audited against Tier A discipline rules.\n        if not re.search(r'\\(E\\d+\\)', section):\n            continue\n        # Slice section into per-PRED blocks via anchored boundaries.\n        boundaries = [bm.start() for bm in anchored_pred_re.finditer(section)]\n        if not boundaries:\n            continue\n        for i, start in enumerate(boundaries):\n            end = boundaries[i + 1] if i + 1 < len(boundaries) else len(section)\n            block = section[start:end]\n            if len(block.strip()) <= 200:\n                continue\n            # 2026-06-01 FP-precision (Brief-9 Cluster 1a): an <h3>/<strong>\n            # boundary also matches the \"Predictions Considered But Not Shipped\"\n            # transparency appendix (rejected CANDIDATES — \"Candidate 1:\" /\n            # \"Candidata 1:\") and the ES MIRROR of every prediction. Auditing\n            # those as formal predictions inflated 3 real predictions to 8\n            # reported positions and drove spurious E1/E2/E4/premortem/multipath\n            # flags. Keep a block ONLY if it is headed by a formal-prediction id\n            # (PRED N) — that excludes the candidate appendix (which carries no\n            # PRED id) — and DEDUPE by that id so each logical prediction is\n            # audited once (the EN copy, which precedes its ES mirror in document\n            # order). ES translation-parity is covered separately by\n            # es_predcal_claim_parity / bilingual_extended_parity.\n            head = re.sub(r'<[^>]+>', ' ', block[:220])\n            if re.search(r'Considered But Not Shipped|consideradas pero no publicadas', head, re.I):\n                continue\n            idm = re.search(r'\\bPRED[\\s-]*(\\d+)\\b', head, re.I)\n            if not idm:\n                continue\n            pid = idm.group(1)\n            if pid in _seen_pred_ids:\n                continue\n            _seen_pred_ids.add(pid)\n            blocks_out.append(block)\n    return blocks_out\n\n\ndef check_prediction_e1_base_rate_sentence(html: str) -> str | None:\n    \"\"\"E1-base-rate (posterior-predictive): each formal prediction must contain\n    a 'Base rate anchoring:' / 'Reference class:' / Spanish equivalent sentence\n    preceding the probability statement.\n\n    Per Tier A E1 rule. Catches predictions emitted without explicit base-rate\n    anchoring (Brier discipline gap; predictions become bare confidence assertions\n    without reference-class grounding).\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    missing = [i for i, b in enumerate(blocks, 1) if not _BASE_RATE_SENTENCE_RE.search(b)]\n    if missing:\n        return (\n            f'E1 base-rate-sentence: prediction(s) at position {missing} (1-indexed) '\n            f'lack a \"Base rate anchoring:\" / \"Reference class:\" sentence (English or Spanish). '\n            f'Per Tier A E1 rule, every prediction must name the reference class + '\n            f'historical comparable before the probability statement.'\n        )\n    return None\n\n\ndef check_prediction_e2_named_verification_doc(html: str) -> str | None:\n    \"\"\"E2-named-verification-doc (posterior-predictive): each formal prediction\n    must name a specific verification document/filing — URL, CIK, regulation reference,\n    quoted document title — not just a source family.\n\n    Per Tier A E2 rule. Catches \"Source: SEC filings\" / \"Source: MOFCOM notices\"\n    generic-family-only emissions vs \"SEC EDGAR 10-Q of MSFT (CIK 0000789019)\" /\n    \"MOFCOM Notice 61\" / Article reference with Regulation cite.\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    missing = [i for i, b in enumerate(blocks, 1) if not _NAMED_VERIFICATION_DOC_RE.search(b)]\n    if missing:\n        return (\n            f'E2 named-verification-doc: prediction(s) at position {missing} (1-indexed) '\n            f'lack a specific verification document reference (URL / CIK / 10-X form / '\n            f'Regulation (EU) X/Y / DORA Article N / quoted document title / .gov/.eu URL). '\n            f'Per Tier A E2 rule, source-family-only references (e.g., \"SEC filings\") '\n            f'are insufficient — name the specific document or filing.'\n        )\n    return None\n\n\ndef check_prediction_e4_class_label(html: str) -> str | None:\n    \"\"\"E4-class-label (posterior-predictive): each formal prediction must carry\n    an explicit class label from the Tier A taxonomy.\n\n    Per Tier A E4 rule. Acceptable labels: TERNARY-SCENARIO / POS-THRESHOLD /\n    NEG-REG-DATE / NEG-EVENT-DATE / POS-EVENT-DATE / NEG-COMM-DATE / NEG-LEG-DATE.\n    Class 6 CONDITIONAL-FRACTURE is retired. Catches predictions emitted without\n    class label OR with non-canonical labels.\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    missing = [i for i, b in enumerate(blocks, 1) if not _CLASS_LABEL_RE.search(b)]\n    if missing:\n        return (\n            f'E4 class-label: prediction(s) at position {missing} (1-indexed) '\n            f'lack explicit class label. Per Tier A E4 rule, predictions must carry '\n            f'one of: TERNARY-SCENARIO / POS-THRESHOLD / NEG-REG-DATE / NEG-EVENT-DATE / '\n            f'POS-EVENT-DATE / NEG-COMM-DATE / NEG-LEG-DATE. Class 6 CONDITIONAL-FRACTURE '\n            f'retired per 2026-05-12 audit.'\n        )\n    return None\n\n\ndef check_prediction_class_distribution(html: str) -> str | None:\n    \"\"\"RUNNER-PREDICTION-DISCIPLINE-CHECKS-01 #1 (2026-05-18): brier-retro-W20 §6.2\n    class-distribution strengthening. For briefs with N≥3 formal predictions:\n    (a) combined NEG-* classes ≤ 40% of total\n    (b) ≥1 POS-THRESHOLD OR ≥1 TERNARY-SCENARIO present\n\n    Per brier-retro §1: 35-pred ledger has ~66% negative-event class\n    over-representation. Negative-binary class has lowest information value\n    per Brier point — easy to write, easy to be right (regulatory inertia,\n    rare events). Brief-level cap forces analytical balance.\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    n = len(blocks)\n    if n < 3:\n        return None  # rule only fires N≥3\n\n    neg_count = 0\n    pos_threshold_count = 0\n    ternary_count = 0\n    for b in blocks:\n        m = _CLASS_LABEL_RE.search(b)\n        if not m:\n            continue  # unlabeled — caught by E4 check separately\n        label = m.group(0).upper()\n        if label.startswith('NEG-'):\n            neg_count += 1\n        elif label == 'POS-THRESHOLD':\n            pos_threshold_count += 1\n        elif label == 'TERNARY-SCENARIO':\n            ternary_count += 1\n\n    issues = []\n    neg_pct = 100.0 * neg_count / n\n    if neg_pct > 40.0:\n        issues.append(\n            f'NEG-* class share {neg_pct:.0f}% ({neg_count}/{n}) exceeds 40% cap'\n        )\n    if pos_threshold_count == 0 and ternary_count == 0:\n        issues.append(\n            f'no POS-THRESHOLD nor TERNARY-SCENARIO in {n}-prediction brief — '\n            f'analytical surface dominated by null-event classes'\n        )\n    if issues:\n        return (\n            f'prediction-class-distribution: {\"; \".join(issues)}. '\n            f'Per brier-retro-W20 §6.2: N≥3 briefs require ≥1 POS-THRESHOLD or '\n            f'TERNARY-SCENARIO + combined NEG-* ≤40%. Class diversity prevents '\n            f'aggregate-Brier domination by easy-null calls.'\n        )\n    return None\n\n\n# Compound-structure patterns — Class 6 anti-pattern detection per brier-retro §2.\n# \"if X then Y\" conditional — resolves indeterminate if condition unfires.\n# \"X AND Y\" conjoined — both distinct events required; resolution ambiguous if\n# only one fires.\n_COMPOUND_CONDITIONAL_RE = re.compile(\n    r'\\b(?:if|si)\\s+[^.<]{15,200}\\bthen\\b|'\n    r'\\bconditional\\s+on\\b|\\bcondicionad[ao]\\s+a\\b',\n    re.IGNORECASE,\n)\n_COMPOUND_CONJOINED_RE = re.compile(\n    r'\\bboth\\s+\\([a-z]\\)[^.<]{10,200}\\band\\s+\\([a-z]\\)\\b|'\n    r'\\bambos\\s+\\([a-z]\\)[^.<]{10,200}\\by\\s+\\([a-z]\\)\\b',\n    re.IGNORECASE,\n)\n# Indeterminate-handling disclosure (\"resolves indeterminate if...\", \"null outcome if...\")\n_INDETERMINATE_DISCLOSURE_RE = re.compile(\n    r'\\b(?:indeterminate|null\\s+(?:outcome|resolution)|unresolved|'\n    r'indeterminad[ao]|sin\\s+resoluci[óo]n|resoluci[óo]n\\s+null)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_prediction_compound_structure(html: str) -> str | None:\n    \"\"\"RUNNER-PREDICTION-DISCIPLINE-CHECKS-01 #2 (2026-05-18): brier-retro-W20 §6.1\n    Class 6 hard-retirement anti-pattern detection.\n\n    Flags predictions with compound structure (conditional \"if X then Y\" OR\n    conjoined \"both (a) X and (b) Y\") without explicit indeterminate-outcome\n    disclosure. Per audit §2 Class 6: CONDITIONAL-FRACTURE / CONJOINED-EVENT\n    predictions are retired Brief 7+ — they resolve indeterminate when\n    condition unfires (contributes zero Brier signal) or when one event\n    fires but not the other (resolution ambiguous).\n\n    Catches: PRED-20260501-003 (Iran-conditional NATO denials) +\n    PRED-20260501-004 (EDA report finding + Council session both required)\n    canonical cases.\n\n    Acceptable: explicit indeterminate-handling disclosure in prediction block\n    (e.g., \"if Iran ceasefire collapses... else resolves indeterminate\").\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n\n    issues = []\n    for i, b in enumerate(blocks, 1):\n        has_compound = bool(_COMPOUND_CONDITIONAL_RE.search(b) or _COMPOUND_CONJOINED_RE.search(b))\n        if not has_compound:\n            continue\n        has_disclosure = bool(_INDETERMINATE_DISCLOSURE_RE.search(b))\n        if not has_disclosure:\n            issues.append(i)\n\n    if issues:\n        return (\n            f'prediction-compound-structure: prediction(s) at position {issues} '\n            f'(1-indexed within FORMAL PREDICTIONS) show Class 6 anti-pattern — '\n            f'compound conditional \"if X then Y\" OR conjoined \"both (a) X and (b) Y\" '\n            f'without indeterminate-outcome disclosure. Per brier-retro-W20 §6.1 + '\n            f'§2 Class 6: split into independent predictions OR add explicit '\n            f'\"resolves indeterminate if [condition unfires]\" disclosure.'\n        )\n    return None\n\n\n# Date pattern for prediction resolution: \"by 2026-XX-XX\" preferred (explicit\n# resolution date), fallback any YYYY-MM-DD. Captures explicit horizon target.\n_PREDICTION_RESOLUTION_DATE_RE = re.compile(\n    r'(?:\\bby|\\bpara|\\bantes\\s+de)\\s+(20\\d{2}-\\d{2}-\\d{2})',\n    re.IGNORECASE,\n)\n_ANY_DATE_RE = re.compile(r'(20\\d{2}-\\d{2}-\\d{2})')\n# Brief publication date extraction sources (priority order):\n# (1) schema.org JSON-LD `datePublished` field — canonical metadata\n# (2) \"<strong>Publish date:</strong> YYYY-MM-DD\" displayed pattern\n# (3) <time datetime=\"YYYY-MM-DD\"> meta tag\n_JSONLD_DATE_PUBLISHED_RE = re.compile(\n    r'\"datePublished\"\\s*:\\s*\"(\\d{4})-(\\d{2})-(\\d{2})',\n)\n_PUBLISH_DATE_STRONG_RE = re.compile(\n    r'<strong>\\s*Publish\\s+date\\s*:\\s*</strong>\\s*(\\d{4})-(\\d{2})-(\\d{2})',\n    re.IGNORECASE,\n)\n_HTML_TIME_DATETIME_RE = re.compile(\n    r'<time\\b[^>]*datetime=[\"\\']?(\\d{4})-(\\d{2})-(\\d{2})',\n    re.IGNORECASE,\n)\n\n\ndef _extract_brief_publish_date(html: str) -> str | None:\n    \"\"\"Best-effort extraction of brief publication date as YYYY-MM-DD from HTML.\n\n    Priority: (1) schema.org JSON-LD datePublished; (2) <strong>Publish date:</strong>\n    displayed pattern; (3) <time datetime> meta. Returns None if all sources missing.\n    \"\"\"\n    for pat in (_JSONLD_DATE_PUBLISHED_RE, _PUBLISH_DATE_STRONG_RE, _HTML_TIME_DATETIME_RE):\n        m = pat.search(html)\n        if m:\n            return f'{m.group(1)}-{m.group(2)}-{m.group(3)}'\n    return None\n\n\ndef check_prediction_deadline_distribution(html: str) -> str | None:\n    \"\"\"RUNNER PREDICTIONS-SUB-DEADLINE-LADDER-DISTRIBUTION-01 (2026-05-18):\n    enforce horizon distribution + anti-clustering on prediction deadlines.\n\n    Brier-retro-W20 §3 found: 35-pred ledger has 10 entries (29%) clustered\n    at 2026-10-31 — 7 of 10 are mechanical D-QA-22 floor-#2 backfill, not\n    event-driven dates. Cluster risk: one macro cycle dominates aggregate\n    Brier signal. Per audit: 35 raw entries ≈ 26 effective independent\n    calibration data points.\n\n    Rules (N>=3 only):\n    - Anti-clustering: <=40% of predictions share same falsifiable_by_date\n    - Horizon ladder: <=80% of predictions in same horizon band\n      (short 0-6m / medium 6-18m / long 18m+). Single-band briefs flagged.\n      Soft floor — does not require >=1 in each band (per brier-retro\n      recommendation that floor #2 should drive WHICH event we predict on,\n      not WHICH date we attach to unrelated events).\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    n = len(blocks)\n    if n < 3:\n        return None\n\n    # Extract resolution date per block: \"by YYYY-MM-DD\" preferred, else any date\n    dates_per_block: list[str | None] = []\n    for b in blocks:\n        m = _PREDICTION_RESOLUTION_DATE_RE.search(b)\n        if not m:\n            m = _ANY_DATE_RE.search(b)\n        dates_per_block.append(m.group(1) if m else None)\n\n    issues = []\n\n    # Anti-clustering check\n    from collections import Counter\n    valid_dates = [d for d in dates_per_block if d]\n    if valid_dates:\n        date_counts = Counter(valid_dates)\n        max_date, max_count = date_counts.most_common(1)[0]\n        cluster_pct = 100.0 * max_count / n\n        if cluster_pct > 40.0:\n            issues.append(\n                f'cluster: {max_count}/{n} predictions share resolution '\n                f'date {max_date} ({cluster_pct:.0f}% > 40% cap)'\n            )\n\n    # Horizon-band distribution (requires brief publish date)\n    publish_date = _extract_brief_publish_date(html)\n    if publish_date and valid_dates:\n        from datetime import date\n        try:\n            pub_d = date.fromisoformat(publish_date)\n            bands = {'short': 0, 'medium': 0, 'long': 0}\n            for d_str in valid_dates:\n                d_obj = date.fromisoformat(d_str)\n                months = (d_obj - pub_d).days / 30.44\n                if months <= 6:\n                    bands['short'] += 1\n                elif months <= 18:\n                    bands['medium'] += 1\n                else:\n                    bands['long'] += 1\n            max_band, max_band_n = max(bands.items(), key=lambda kv: kv[1])\n            band_pct = 100.0 * max_band_n / len(valid_dates)\n            if band_pct > 80.0:\n                issues.append(\n                    f'horizon-concentration: {max_band_n}/{len(valid_dates)} '\n                    f'predictions in {max_band}-horizon band ({band_pct:.0f}% > 80% '\n                    f'soft floor; bands short<=6m / medium 6-18m / long >18m)'\n                )\n        except (ValueError, TypeError):\n            pass  # silent skip on date parse error\n\n    if issues:\n        return (\n            f'prediction-deadline-distribution: {\"; \".join(issues)}. '\n            f'Per brier-retro-W20 §3: mechanical date clustering reduces '\n            f'effective independent calibration N. Prefer event-driven '\n            f'resolution dates over rule-driven floor backfill.'\n        )\n    return None\n\n\n# Extension B (adversarial pre-mortem) — failure modes documented per prediction.\n# Spec: PREDICTION_QUALITY_DISCIPLINE_EXTENSIONS_2026-05-13.md §4 Extension B.\n_PREMORTEM_RE = re.compile(\n    r'(?:Failure\\s+mode|Pre-?mortem|Adversarial\\s+pre-?mortem|Modo\\s+de\\s+falla|Pre[-\\s]mortem\\s+adversarial)\\s*:',\n    re.IGNORECASE,\n)\n\n\ndef check_prediction_adversarial_premortem(html: str) -> str | None:\n    \"\"\"Extension B runner check — adversarial pre-mortem per prediction.\n\n    Per spec §4 Extension B: each prediction should document ≥1 failure mode\n    with detection signal + probability adjustment. Catches narrative-coherence\n    trap (SD dominant failure mode per audit) before lock.\n\n    WARN-only Brief 8+ calibration; ratchet to BLOCKER Brief 10+ per\n    [[feedback_runner_calibrate_then_ratchet]].\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    missing = [i for i, b in enumerate(blocks, 1) if not _PREMORTEM_RE.search(b)]\n    if missing:\n        return (\n            f'B adversarial pre-mortem: prediction(s) at position {missing} '\n            f'(1-indexed) lack \"Failure mode:\" / \"Pre-mortem:\" sentence with '\n            f'detection signal + probability adjustment. Per spec §4 Extension B: '\n            f'each prediction must document ≥1 orthogonal failure mode to '\n            f'intercept narrative-coherence-trap before publication.'\n        )\n    return None\n\n\n# Extension D (multi-path elicitation) — 2-3 independent reasoning paths.\n# Spec: PREDICTION_QUALITY_DISCIPLINE_EXTENSIONS_2026-05-13.md §4 Extension D.\n_MULTIPATH_RE = re.compile(\n    r'(?:Path\\s+[1-3]\\b|Camino\\s+[1-3]\\b|Multi-?path|Multi[-\\s]camino|Outside-?view\\s+path|Structural\\s+path|Analogical\\s+path)',\n    re.IGNORECASE,\n)\n\n\ndef check_prediction_multi_path_elicitation(html: str) -> str | None:\n    \"\"\"Extension D runner check — multi-path elicitation per prediction.\n\n    Per spec §4 Extension D: each prediction documents 2-3 independent\n    reasoning paths to same probability with convergence/divergence note.\n    Intercepts single-path-bias (analyst anchoring on one framing).\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    missing = [i for i, b in enumerate(blocks, 1) if not _MULTIPATH_RE.search(b)]\n    if missing:\n        return (\n            f'D multi-path elicitation: prediction(s) at position {missing} '\n            f'(1-indexed) lack multi-path reasoning markers (Path 1/2/3 or '\n            f'outside-view/structural/analogical framing). Per spec §4 '\n            f'Extension D: 2-3 independent reasoning paths surfaced with '\n            f'convergence/divergence note prevent single-path-bias.'\n        )\n    return None\n\n\n# Extension G (Type 0 silence catalog) — brief-level discipline.\n# Spec: PREDICTION_QUALITY_DISCIPLINE_EXTENSIONS_2026-05-13.md §4 Extension G.\n_TYPE_ZERO_CATALOG_RE = re.compile(\n    r'(?:Predictions\\s+considered\\s+but\\s+not\\s+shipped|Predicciones\\s+consideradas\\s+pero\\s+no\\s+emitidas|Type\\s+0\\s+silence|Catálogo\\s+de\\s+silencio)',\n    re.IGNORECASE,\n)\n\n\ndef check_brief_type_zero_catalog(html: str) -> str | None:\n    \"\"\"Extension G runner check — brief-level Type 0 silence catalog.\n\n    Per spec §4 Extension G: brief footer/appendix lists \"Predictions\n    considered but not shipped\" with categories. Documents editorial\n    discipline (anti-cherry-picking). Brief-level rule, not per-prediction.\n\n    WARN if absent + brief has FORMAL PREDICTIONS section (i.e., a real\n    brief, not a stub or alert).\n    \"\"\"\n    if not _FORMAL_PREDICTIONS_SECTION_RE.search(html):\n        return None  # not a full brief\n    if _TYPE_ZERO_CATALOG_RE.search(html):\n        return None  # catalog present\n    return (\n        'G Type 0 silence catalog: brief lacks \"Predictions considered but '\n        'not shipped\" footer/appendix section. Per spec §4 Extension G: '\n        'editorial discipline includes documenting omitted candidates with '\n        'categories (speculative / insufficient_observable_trigger / '\n        'horizon_too_long / etc). Anti-cherry-picking signal.'\n    )\n\n\n# A5 — long-horizon calibration variance disclosure (net-new beyond spec).\n# Brier-retro long-horizon discipline + Tetlock empirical 24m+ calibration decay.\n_LONG_HORIZON_DISCLOSURE_RE = re.compile(\n    r'long-?horizon\\s+(?:calibration\\s+)?variance|Tetlock\\s+(?:variance|horizon)|'\n    r'horizonte\\s+(?:largo|extendido)\\s+variance|calibration\\s+(?:decay|degradation)|'\n    r'long-?horizon\\s+uncertainty\\s+driver',\n    re.IGNORECASE,\n)\n\n\ndef check_prediction_long_horizon_disclosure(html: str) -> str | None:\n    \"\"\"A5 net-new check — long-horizon (>24m) predictions require Tetlock\n    variance disclosure.\n\n    Per Tetlock GJP empirical: prediction calibration decays beyond 24m\n    horizon. Long-horizon predictions stated without variance disclosure\n    inflate false confidence.\n\n    Logic: for each prediction with falsifiable_by_date - published_date > 24m,\n    require explicit \"long-horizon calibration variance\" or equivalent\n    uncertainty driver text.\n    \"\"\"\n    blocks = _extract_formal_prediction_blocks(html)\n    if not blocks:\n        return None\n    publish_date = _extract_brief_publish_date(html)\n    if not publish_date:\n        return None  # cannot compute horizons without publish date\n    from datetime import date\n    try:\n        pub_d = date.fromisoformat(publish_date)\n    except (ValueError, TypeError):\n        return None\n    long_horizon_missing = []\n    for i, b in enumerate(blocks, 1):\n        m = _PREDICTION_RESOLUTION_DATE_RE.search(b) or _ANY_DATE_RE.search(b)\n        if not m:\n            continue\n        try:\n            res_d = date.fromisoformat(m.group(1))\n        except (ValueError, TypeError):\n            continue\n        months = (res_d - pub_d).days / 30.44\n        if months > 24:\n            if not _LONG_HORIZON_DISCLOSURE_RE.search(b):\n                long_horizon_missing.append((i, round(months, 1)))\n    if long_horizon_missing:\n        items = '; '.join(f'PRED #{i} ({m:.1f}m horizon)' for i, m in long_horizon_missing)\n        return (\n            f'A5 long-horizon disclosure: long-horizon prediction(s) {items} '\n            f'lack \"long-horizon calibration variance per Tetlock\" or '\n            f'equivalent uncertainty driver text. Per Tetlock GJP empirical: '\n            f'calibration decays exponentially beyond 24m; disclosure prevents '\n            f'false-confidence inflation.'\n        )\n    return None\n\n\n# ── WARNINGS ─────────────────────────────────────────────────────────────────\n\ndef check_translation_body_parity(html: str) -> str | None:\n    \"\"\"ES word count should be within reasonable ratio of EN (typically 1.0-1.5).\"\"\"\n    en_blocks = re.findall(\n        r'<div[^>]*class=\"[^\"]*\\blang-en\\b[^\"]*\"[^>]*>(.*?)</div>',\n        html, re.DOTALL,\n    )\n    es_blocks = re.findall(\n        r'<div[^>]*class=\"[^\"]*\\blang-es\\b[^\"]*\"[^>]*>(.*?)</div>',\n        html, re.DOTALL,\n    )\n    en_text = ' '.join(re.sub(r'<[^>]+>', ' ', b) for b in en_blocks)\n    es_text = ' '.join(re.sub(r'<[^>]+>', ' ', b) for b in es_blocks)\n    en_w = len(en_text.split())\n    es_w = len(es_text.split())\n    if en_w == 0:\n        return None\n    ratio = es_w / en_w\n    if ratio < 0.85:\n        return (f'ES body shorter than EN: EN={en_w}w ES={es_w}w '\n                f'(ratio={ratio:.2f}, expected 0.95-1.5)')\n    if ratio > 1.7:\n        return (f'ES body unexpectedly long: EN={en_w}w ES={es_w}w '\n                f'(ratio={ratio:.2f}, expected 0.95-1.5)')\n    return None\n\n\ndef check_meta_tags_present(html: str) -> str | None:\n    \"\"\"OG / canonical / description must be in <head>.\"\"\"\n    missing = []\n    if 'property=\"og:title\"' not in html:\n        missing.append('og:title')\n    if 'property=\"og:description\"' not in html:\n        missing.append('og:description')\n    if 'rel=\"canonical\"' not in html:\n        missing.append('canonical')\n    if not re.search(r'<meta\\s+name=\"description\"', html):\n        missing.append('description')\n    return f'missing meta tags: {\", \".join(missing)}' if missing else None\n\n\ndef check_local_link_integrity(html: str, briefs_dir: str) -> str | None:\n    \"\"\"Local /briefs/SD_*.html links must point to existing files.\"\"\"\n    missing = []\n    seen = set()\n    for m in re.finditer(r'href=[\"\\'](/briefs/SD_\\d+_\\d+_[\\w]+\\.html)[\"\\']',\n                         html):\n        target = m.group(1)\n        if target in seen:\n            continue\n        seen.add(target)\n        local = os.path.join(briefs_dir, os.path.basename(target))\n        if not os.path.exists(local):\n            missing.append(target)\n    return f'broken local links: {\", \".join(missing[:3])}' if missing else None\n\n\ndef check_minimum_word_count(html: str) -> str | None:\n    \"\"\"Brief should be substantive — flag suspiciously short content.\"\"\"\n    visible = re.sub(r'<style.*?</style>|<script.*?</script>', '',\n                     html, flags=re.DOTALL)\n    text = re.sub(r'<[^>]+>', ' ', visible)\n    words = len(text.split())\n    if words < 1500:\n        return f'low word count: {words}w (typical brief 1500+)'\n    return None\n\n\n# Heuristic: significant numerical claims that should carry a citation.\n# The pattern catches: percentages above noise floor (>5%), dollar/euro\n# amounts in millions/billions, large round numbers, year-prefixed claims.\n# A \"citation marker\" is anything within 200 chars that looks like a\n# source attribution: bracketed source label, a hyperlink, a parenthetical\n# author/year, or a footnote anchor.\n\nNUMERICAL_CLAIM_RE = re.compile(\n    r'(?:'\n    # Percentages ≥5% (matches the prompt rule threshold; excludes 1-4%\n    # which the editorial standard does not require to be sourced).\n    # 5-9% or 10-99% or 100-999%.\n    r'\\b(?:[5-9]|\\d{2,3})(?:[.,]\\d+)?\\s*%(?!s)'\n    # Currency amounts ≥1 million (millions/billions/trillions in any form).\n    r'|[€$£]\\s*\\d+(?:[.,]\\d+)?\\s*(?:billion|million|trillion|bn|mn|tn|millones|miles de millones)\\b'\n    r'|\\b\\d+(?:[.,]\\d+)?\\s*(?:billion|million|trillion)\\s+(?:dollars|euros|USD|EUR)\\b'\n    # Year ranges (2014-2025 etc.) — useful for evaluating event windows.\n    r'|\\b(?:1[89]\\d{2}|20[0-4]\\d)\\b(?=\\s*[-–—]\\s*(?:1[89]\\d{2}|20[0-4]\\d))'\n    r')',\n    re.IGNORECASE,\n)\n\n# Source-organization names that count as \"cited in prose\" when they\n# appear within the proximity window. Editorial style favours\n# conversational citation (\"the IMF's April 2026 WEO identifies...\")\n# over bracket-formal (\"[IMF, WEO 2026]\"). The runner must accept both.\nSOURCE_ORG_NAMES = (\n    r'IMF|FMI|NATO|OTAN|OECD|OCDE|WEF|World\\s+Economic\\s+Forum|'\n    r'IEA|EIA|BIS|ECB|BCE|Federal\\s+Reserve|Fed|World\\s+Bank|Banco\\s+Mundial|'\n    r'Eurostat|UN|United\\s+Nations|ONU|EU|European\\s+(?:Commission|Council|Parliament|Union)|'\n    r'CE|Comisión\\s+Europea|Reuters|Bloomberg|Financial\\s+Times|FT|'\n    r'New\\s+York\\s+Times|NYT|Washington\\s+Post|Wall\\s+Street\\s+Journal|WSJ|'\n    r'Economist|Foreign\\s+Affairs|Foreign\\s+Policy|Politico|Le\\s+Monde|'\n    r'El\\s+País|S&P|Moody\\'?s|Fitch|REE|CNMC|ENTSO-E|TSMC|ASML|'\n    r'Tesla|Microsoft|Google|Apple|Meta|OpenAI|Anthropic|xAI|COSCO|'\n    r'Pew|Eurasia\\s+Group|RAND|Brookings|Atlantic\\s+Council|CSIS|Chatham\\s+House|'\n    r'CWA|Munich\\s+Security\\s+Conference|'\n    # Defense + critical-minerals research institutions (added 2026-05-02 retrofit Brief 5)\n    r'SIPRI|EDA|Agencia\\s+Europea\\s+de\\s+Defensa|IISS|MERICS|EIB|BEI|USGS|Lynas|EUR-Lex|'\n    # Finance / payments / stablecoin sector source orgs (RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01,\n    # 2026-05-29, Payments brief residual). Evidence-driven: only orgs that appear as source\n    # attributions in the corpus. Dropped over-broad short tokens (Visa/Mastercard/Citi) per the\n    # audit's FP-risk note; kept distinctive multi-char names.\n    r'Tether|Circle|Paxos|Standard\\s+Chartered|BlackRock|Franklin\\s+Templeton|'\n    r'CIPS|SWIFT|mBridge|MiCAR|MiCA|GENIUS\\s+Act|Ripple|Bruegel|Bessent'\n)\n\nCITATION_NEAR_RE = re.compile(\n    r'(?:'\n    r'\\[[A-Z]{2,}[^\\]]{0,80}\\]'                # [WEF Global Risks Report 2026]\n    r'|\\([A-Z][a-zA-Z]+\\s+(?:et\\s+al\\.?,?\\s+)?\\d{4}\\)'  # (Smith et al. 2024)\n    r'|\\bhttps?://[^\\s<>\"]+'                   # bare URL\n    r'|<a\\s+[^>]*href=[\"\\']https?://'          # hyperlink\n    r'|\\b(?:source|fuente|según|per|via)\\s*:\\s*[A-Z]'  # \"Source: NATO...\"\n    r'|\\b(?:' + SOURCE_ORG_NAMES + r')\\b'      # prose mention of known source org\n    r')',\n    re.IGNORECASE,\n)\n\n\n# Internal-forecast exclusion (RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01, 2026-05-29).\n# Port of the D01 _D01_STRUCTURAL_EXCLUSION_RE family to numerical_citation_contract:\n# a numerical claim sitting in forecast/scenario context is the brief's OWN\n# analytical output (a probability, scenario odds, falsifier threshold, reasoning-\n# path band, [SD-estimate] projection), NOT an external claim that needs a source.\n# Searched in a ±150-char window around a claim that already failed the citation\n# proximity test. Covers EN + ES. Residual NOT covered here (separate FP classes,\n# documented on the ticket): year-range temporal scopes (\"2026-2030 window\"),\n# bare scenario-table cells (tags stripped → no <td> context), and source-org\n# recognizer gaps (orgs absent from SOURCE_ORG_NAMES, e.g. Standard Chartered).\n_NCC_INTERNAL_FORECAST_RE = re.compile(\n    r'\\[SD-(?:estimate|inference|attribution|aggregate)\\b'      # [SD-estimate] / [SD-estimate: …]\n    r'|\\b(?:probability|probabilidad)\\b'                        # the magnitude IS a probability\n    r'|\\b(?:Scenario|Escenario)\\s+[A-Z0-9]'                     # scenario header\n    r'|\\b(?:we\\s+assess|estimamos|evaluamos|asignamos)\\b'       # self-assessment\n    r'|\\b(?:Falsifier|Falsador)\\b'                              # falsifier block\n    r'|\\b(?:most\\s+likely\\s+outcome|resultado\\s+m[áa]s\\s+probable|desenlace\\s+m[áa]s\\s+probable)\\b'\n    r'|\\b(?:annualis|annualiz|anualiz)'                         # annualization derivation\n    r'|\\b(?:trajectory|trayectoria)\\b'                          # projection trajectory\n    r'|\\b(?:if|si)\\b[^.]{0,70}?\\b(?:reaches|alcanza|settlement|liquidaci[óo]n|exceeds|supera|cuota|share|sustained|sostiene)\\b'\n    r'|\\bPath\\s+\\d'                                             # multi-path elicitation (EN)\n    r'|\\b(?:outside[\\s-]view|base[\\s-]?rate\\s+path|structural\\s+path|analogical\\s+path|convergence|path-span|band-widening)\\b'\n    r'|\\bV[íi]a\\s+(?:\\d|perspectiva|estructural|anal[óo]gica|de\\s+tasa)'   # multi-path elicitation (ES)\n    r'|\\bConvergencia\\b',\n    re.IGNORECASE,\n)\n\n\n# A year-range claim (NUMERICAL_CLAIM_RE's range alternative) returns only the\n# leading 4-digit year — a bare YYYY token is a temporal scope, not a sourced\n# magnitude (RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01, 2026-05-29).\n_NCC_YEAR_RANGE_TOKEN_RE = re.compile(r'^(?:1[89]\\d{2}|20[0-4]\\d)$')\n\n\ndef check_numerical_citation_contract(html: str) -> str | None:\n    \"\"\"Heuristic: numerical claims should sit near a citation marker.\n\n    Removes <style>, <script>, and elements with KPI-like classes\n    (kpi-val, score, badge) that show summary numbers already cited\n    elsewhere in the body. Footnote anchors (<a href=\"#fn-N\">) are SD's\n    canonical citation form (the footnote body carries the URL/source, per\n    EDITORIAL-FOOTNOTE-URL-01) — they are marked [FN] before tag-stripping\n    so a footnote-anchored magnitude counts as cited. Then for each\n    remaining numerical claim, checks ±500 chars of surrounding text for\n    any citation-shaped token (incl. [FN]).\n\n    Window calibration (RUNNER-THRESHOLD-CALIBRATION-AUDIT-01, 2026-05-29):\n    widened ±250 → ±500 after the audit showed the original window was a\n    D01-100-char-class miss — citations sit inline but 250-500 chars away in\n    SD's paragraph style, so ±250 over-fired (e.g. DORA 23→4, COSCO 10→0,\n    Spain 6→1 uncited when widened to ±500). ±500 ≈ the same-paragraph\n    citation-neighbourhood in SD prose; ±750 over-widens (a single citation\n    would \"cover\" multiple paragraphs, defeating the proximity intent).\n\n    NB: Heuristic with known limits — residual false positives on\n    forecast-dense briefs come from the brief's OWN scenario/falsifier\n    magnitudes (probabilities, [SD-estimate] projections) which are internal\n    analytical output, not external claims; that structural-content-category\n    exclusion is RUNNER-STRUCTURAL-FP-PER-CHECK-01 scope, not this window\n    calibration. Reported as a WARNING for human review, not a blocker. A\n    high count (≥8) is the signal worth investigating.\n    \"\"\"\n    # Strip non-prose containers\n    cleaned = re.sub(r'<style[^>]*>.*?</style>', ' ', html, flags=re.DOTALL)\n    cleaned = re.sub(r'<script[^>]*>.*?</script>', ' ', cleaned, flags=re.DOTALL)\n    # Remove elements that exist for UI summary, not editorial claim:\n    # KPI cards (.kpi, .kpi-val, .kpi-label), score chips (.score),\n    # and badges (.badge, .pill).\n    cleaned = re.sub(\n        r'<(?:span|div|td)[^>]*class=\"[^\"]*\\b(?:kpi|kpi-val|kpi-label|score|badge|pill|tag|sh|chart-title)\\b[^\"]*\"[^>]*>.*?</(?:span|div|td)>',\n        ' ', cleaned, flags=re.DOTALL,\n    )\n    # Mark footnote anchors as a [FN] citation token before tag-stripping —\n    # the footnote body carries the URL/source (EDITORIAL-FOOTNOTE-URL-01), so a\n    # footnote-anchored magnitude IS cited. Without this the tag-strip below\n    # erases #fn-N anchors and the canonical SD citation form reads as uncited.\n    cleaned = re.sub(\n        r'<a\\s+[^>]*href=\"#fn[^\"]*\"[^>]*>.*?</a>', ' [FN] ',\n        cleaned, flags=re.DOTALL | re.IGNORECASE,\n    )\n    # Scenario-probability table cells (e.g. <td><strong>35%</strong></td>) are the\n    # brief's OWN forecast odds. The tag-strip below erases the <td> context, so mark\n    # a standalone numeric-percentage cell with an internal-forecast token ([SDFC])\n    # first — analogous to the [FN] trick (RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01).\n    cleaned = re.sub(\n        r'<td[^>]*>\\s*(?:<strong>\\s*)?\\d{1,3}(?:[.,]\\d+)?\\s*%\\s*(?:</strong>\\s*)?</td>',\n        r' [SDFC] \\g<0> ', cleaned, flags=re.IGNORECASE,\n    )\n    text = re.sub(r'<[^>]+>', ' ', cleaned)\n    text = re.sub(r'&[a-z]+;', ' ', text)\n    text = re.sub(r'\\s+', ' ', text)\n\n    uncited: list[str] = []\n    for m in NUMERICAL_CLAIM_RE.finditer(text):\n        if len(uncited) >= 12:\n            break\n        # Year-range temporal scopes (\"2026-2030 window\") are not sourced\n        # magnitudes. NUMERICAL_CLAIM_RE's year-range alternative returns only\n        # the leading 4-digit year (2nd year is a zero-width lookahead), so a\n        # bare YYYY token is always a temporal scope — skip it (2026-05-29).\n        if _NCC_YEAR_RANGE_TOKEN_RE.match(m.group(0).strip()):\n            continue\n        # ±500 char window — calibrated 2026-05-29 (was ±250, a D01-class\n        # too-narrow miss; see docstring). [FN] marks a footnote anchor.\n        window_start = max(0, m.start() - 500)\n        window_end = min(len(text), m.end() + 500)\n        window = text[window_start:window_end]\n        if not (CITATION_NEAR_RE.search(window) or '[FN]' in window):\n            # Structural exclusion: skip claims that are the brief's own\n            # forecast/scenario magnitudes (internal analytical output, not\n            # external claims). See _NCC_INTERNAL_FORECAST_RE; [SDFC] marks a\n            # standalone scenario-probability table cell.\n            fctx = text[max(0, m.start() - 150):min(len(text), m.end() + 150)]\n            if '[SDFC]' in fctx or _NCC_INTERNAL_FORECAST_RE.search(fctx):\n                continue\n            ctx = text[max(0, m.start() - 50):min(len(text), m.end() + 50)]\n            uncited.append(f'\"…{ctx.strip()}…\"')\n\n    # Threshold: 8 strikes ≈ enough to suspect systemic uncited claims\n    if len(uncited) >= 8:\n        return (f'{len(uncited)}+ numerical claims without nearby citation; '\n                f'sample: {\" | \".join(uncited[:3])}')\n    return None\n\n\nURL_RE = re.compile(r'href=[\"\\'](https?://[^\"\\'<>]+)[\"\\']')\n\n\n# ── L5: critical-fact registry ───────────────────────────────────────────────\n# Loaded lazily on first call. Set FACTS_REGISTRY_PATH env var to override.\n\n_FACTS_CACHE: dict | None = None\n\n\ndef _load_facts_registry() -> dict:\n    \"\"\"Load data/facts.yaml once. Returns {} if missing or yaml not installed.\"\"\"\n    global _FACTS_CACHE\n    if _FACTS_CACHE is not None:\n        return _FACTS_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _FACTS_CACHE = {}\n        return _FACTS_CACHE\n    path = os.environ.get(\n        'FACTS_REGISTRY_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'facts.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or {}\n    except OSError:\n        data = {}\n    _FACTS_CACHE = data if isinstance(data, dict) else {}\n    return _FACTS_CACHE\n\n\ndef _filter_registry_by_brand(registry: dict, brand: str | None) -> dict:\n    \"\"\"Filter facts.yaml-style registry by brand field. Default behavior:\n\n    - If brand is None: return all entries (back-compat — pre-FORK-PREP-GAP-1\n      callers without brand context get full registry).\n    - If brand is a specific value (e.g. 'shadow-dynamics' or 'clave'):\n      return entries whose `brand` field matches OR equals 'both'. Entries\n      with no `brand` field default to 'shadow-dynamics' (pre-2026-05-09\n      schema implicit assumption preserved).\n\n    FORK-PREP-GAP-1-DATA-BRAND-AWARE-01 (BACKLOG 2026-05-09): brand-aware\n    schema unblocks first Clave brief by ensuring SD-specific facts\n    (NATO 5%, IMF 60%, COSCO Hamburg 24.9%, etc.) don't fire on Clave\n    Hispanic-context briefs and vice versa.\n    \"\"\"\n    if brand is None:\n        return registry\n    filtered = {}\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        entry_brand = entry.get('brand', 'shadow-dynamics')\n        if entry_brand == brand or entry_brand == 'both':\n            filtered[fact_id] = entry\n    return filtered\n\n\ndef check_critical_facts(html: str, brand: str | None = None) -> str | None:\n    \"\"\"Match HTML against the curated registry of recurring fact errors.\n\n    Each registry entry has `forbidden: [{pattern, reason}, ...]`. A pattern\n    match (case-insensitive) is a warning — not a blocker, since false\n    positives are possible for narrow contexts. Use `[fact_id] reason …` in\n    the detail so the failure log clusters cleanly for the retrospective.\n\n    `brand` (optional) filters registry to entries matching the brief's brand\n    (or `both`). When None, all entries apply (back-compat; pre-FORK-PREP-GAP-1\n    callers preserved). Derived from brief filename by `_derive_brand_from_filename`.\n    \"\"\"\n    registry = _filter_registry_by_brand(_load_facts_registry(), brand)\n    if not registry:\n        return None\n    hits: list[str] = []\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        for rule in entry.get('forbidden') or []:\n            pat = rule.get('pattern')\n            reason = rule.get('reason', '')\n            context_negate = rule.get('context_negate')\n            if not pat:\n                continue\n            try:\n                m = re.search(pat, html, flags=re.IGNORECASE)\n                if not m:\n                    continue\n                if context_negate:\n                    win = 400\n                    s = max(0, m.start() - win)\n                    e = min(len(html), m.end() + win)\n                    if re.search(context_negate, html[s:e], flags=re.IGNORECASE):\n                        continue\n                hits.append(f'[{fact_id}] {reason}')\n                break  # one hit per fact is enough\n            except re.error:\n                continue\n    if hits:\n        return f'{len(hits)} fact(s) flagged: {\" || \".join(hits[:3])}'\n    return None\n\n\ndef check_magnitude_framing_consistency(html: str) -> str | None:\n    \"\"\"Detect anti-canonical magnitude framings against facts.yaml::magnitude_flips.\n\n    Catches errors like 'capacity runs 40% short of wartime' when canonical is\n    'capacity at 40% of wartime' — same number, opposite semantic. Per-claim\n    Layer 4 (Tavily) verification cannot catch this because both framings verify\n    against the same source numerically; the bug is cross-section coherence\n    against a canonical magnitude framing recorded in data/facts.yaml.\n\n    Schema: facts.yaml entries optionally carry a `magnitude_flips` list of\n    `{pattern, reason}` regex rules — same structure as `forbidden` but\n    semantically scoped to magnitude-flip errors (vs direct contradictions).\n\n    Origin: Brief 6 Critical Minerals pre-promote audit 2026-05-08 caught the\n    same fact (`ammunition_capacity_wartime`) framed canonically in 2 places\n    and anti-canonically in 2 places within the same document; runner missed\n    because the existing `forbidden` patterns target the contradiction\n    direction ('fully adequate'), not the magnitude flip ('40% short').\n    \"\"\"\n    registry = _load_facts_registry()\n    if not registry:\n        return None\n    hits: list[str] = []\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        for rule in entry.get('magnitude_flips') or []:\n            pat = rule.get('pattern')\n            reason = rule.get('reason', '')\n            if not pat:\n                continue\n            try:\n                if re.search(pat, html, flags=re.IGNORECASE):\n                    hits.append(f'[{fact_id}] {reason}')\n                    break  # one hit per fact is enough\n            except re.error:\n                continue\n    if hits:\n        return f'{len(hits)} magnitude flip(s): {\" || \".join(hits[:3])}'\n    return None\n\n\n# ── PATTERN RECOGNITION CITATIONS ────────────────────────────────────────────\n# Tracker: PATTERN-RECOGNITION-SYSTEM-01\n# Spec:    EDITORIAL-PATTERN-XREF-01 (Forecaster prompt) + data/patterns.yaml\n# Loaded lazily; respects PATTERNS_REGISTRY_PATH env override.\n\n_PATTERNS_CACHE: set[str] | None = None\n\n\ndef _load_pattern_ids() -> set[str]:\n    \"\"\"Return set of registered pattern_ids from data/patterns.yaml.\n    Empty set if missing or yaml unavailable (gracefully no-ops the check).\"\"\"\n    global _PATTERNS_CACHE\n    if _PATTERNS_CACHE is not None:\n        return _PATTERNS_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _PATTERNS_CACHE = set()\n        return _PATTERNS_CACHE\n    path = os.environ.get(\n        'PATTERNS_REGISTRY_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'patterns.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or {}\n    except OSError:\n        data = {}\n    ids: set[str] = set()\n    for entry in (data.get('patterns') or []):\n        pid = entry.get('pattern_id') if isinstance(entry, dict) else None\n        if pid:\n            ids.add(pid)\n    _PATTERNS_CACHE = ids\n    return _PATTERNS_CACHE\n\n\n_PATTERN_ID_RE = re.compile(r'\\b(PATTERN-[A-Z]+(?:-[A-Z]+)+)\\b')\n_BRIEF_FILE_RE = re.compile(r'\\b((?:SD|CL)_\\d{8}_\\d{4}_[A-Za-z_]+\\.html)\\b')\n\n\ndef check_pattern_citations(html: str, briefs_dir: str | None = None) -> str | None:\n    \"\"\"Validate §12 PATTERN RECOGNITION integrity (post EDITORIAL-PATTERN-XREF-01).\n\n    - Pattern_ids cited (PATTERN-NOUN-MECHANISM uppercase) must exist in\n      data/patterns.yaml.\n    - Brief filenames cited (SD_*.html / CL_*.html) must exist on disk.\n    - Briefs that use prose-only references (no formal markers) pass trivially.\n\n    WARNING level — false positives possible if a §12 reformulates a pattern\n    without naming a pattern_id, or references a brief by title rather than\n    filename. Editorial review remains the source of truth.\n    \"\"\"\n    m = re.search(r'(?is)PATTERN\\s+RECOGNITION.*?(?=<h[12]|</section|</body)', html)\n    if not m:\n        return None\n    section = m.group(0)\n\n    issues: list[str] = []\n\n    pattern_ids = set(_PATTERN_ID_RE.findall(section))\n    if pattern_ids:\n        registered = _load_pattern_ids()\n        if registered:\n            unknown = pattern_ids - registered\n            if unknown:\n                issues.append(f'unregistered pattern_ids: {sorted(unknown)}')\n\n    cited_files = set(_BRIEF_FILE_RE.findall(section))\n    if cited_files:\n        search_dirs = []\n        if briefs_dir:\n            search_dirs.append(briefs_dir)\n        search_dirs.extend([\n            '/root/n8n/local-files/briefs',\n            '/root/n8n/local-files/briefs-clave',\n        ])\n        missing = [\n            f for f in cited_files\n            if not any(os.path.exists(os.path.join(d, f)) for d in search_dirs)\n        ]\n        if missing:\n            issues.append(f'missing brief filenames: {sorted(missing)}')\n\n    if issues:\n        return '§12 ' + ' || '.join(issues)\n    return None\n\n\ndef check_url_integrity(html: str, *, network: bool = False,\n                        timeout: float = 4.0) -> str | None:\n    \"\"\"Optionally HEAD each cited URL to detect 4xx/5xx and DNS failures.\n\n    Skipped by default (network=False) so the runner stays offline-clean\n    in CI. Enable per-call by passing `--check-urls` to the CLI or\n    setting `network=True` in library calls.\n    \"\"\"\n    if not network:\n        return None\n    import urllib.request\n    import urllib.error\n    import socket\n\n    urls = list(set(URL_RE.findall(html)))\n    failures: list[str] = []\n    for url in urls:\n        try:\n            req = urllib.request.Request(url, method='HEAD',\n                                         headers={'User-Agent': 'sd-quality-check/1.0'})\n            with urllib.request.urlopen(req, timeout=timeout) as resp:\n                if resp.status >= 400:\n                    failures.append(f'{resp.status} {url}')\n        except urllib.error.HTTPError as e:\n            if e.code >= 400:\n                failures.append(f'{e.code} {url}')\n        except (urllib.error.URLError, socket.timeout, OSError) as e:\n            failures.append(f'unreachable {url} ({type(e).__name__})')\n\n    if failures:\n        return f'{len(failures)} broken citation URL(s): {\"; \".join(failures[:3])}'\n    return None\n\n\n# Footnote anchors introduced by EDITORIAL-FOOTNOTE-01.\n# Permissive on attribute order/class/whitespace.\nSUP_FN_RE = re.compile(\n    r'<sup\\b[^>]*>\\s*<a\\b[^>]*\\bhref=[\"\\']#fn-(\\d+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\nLI_FN_ID_RE = re.compile(\n    r'<li\\b[^>]*\\bid=[\"\\']fn-(\\d+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\n\n\ndef check_footnote_integrity(html: str) -> str | None:\n    \"\"\"Footnote anchor integrity (EDITORIAL-FOOTNOTE-01 F2).\n\n    When a brief uses footnotes, every <sup><a href=\"#fn-N\"> must resolve\n    to a <li id=\"fn-N\">, every <li id=\"fn-N\"> must be referenced ≥1 time,\n    numbering must be contiguous 1..max, and ids must be unique.\n\n    Brief with zero footnote markup → returns None (current corpus state).\n    \"\"\"\n    sup_refs = [int(m.group(1)) for m in SUP_FN_RE.finditer(html)]\n    li_ids = [int(m.group(1)) for m in LI_FN_ID_RE.finditer(html)]\n\n    if not sup_refs and not li_ids:\n        return None\n\n    issues: list[str] = []\n    sup_set = set(sup_refs)\n    li_set = set(li_ids)\n\n    orphan_refs = sorted(sup_set - li_set)\n    if orphan_refs:\n        issues.append(f'<sup> ref(s) without <li>: {orphan_refs[:5]}')\n\n    unreferenced = sorted(li_set - sup_set)\n    if unreferenced:\n        issues.append(f'<li> never referenced: {unreferenced[:5]}')\n\n    if li_ids:\n        max_n = max(li_ids)\n        gaps = sorted(set(range(1, max_n + 1)) - li_set)\n        if gaps:\n            issues.append(f'numbering not contiguous, missing: {gaps[:5]}')\n\n    if len(li_ids) != len(li_set):\n        from collections import Counter\n        dupes = sorted(n for n, c in Counter(li_ids).items() if c > 1)\n        issues.append(f'duplicate <li id=\"fn-N\">: {dupes[:5]}')\n\n    return '; '.join(issues) if issues else None\n\n\n# RUNNER-CHECK-FOOTNOTE-SCOPE-MISMATCH-01 (2026-05-25)\n# Detects footnotes anchoring claims from different dates without source-scope\n# disambiguation. Canonical instance: Brief 8 fn-8 (US Treasury sb0197 dated\n# 18 July 2025) anchored both \"Jun 2025 Senate Banking testimony\" and \"Nov 2025\n# revision\" attribution. Fixed in commit aa08039 via Source-scope note +\n# [SD-attribution] marker in the footnote body.\n#\n# Detection: extract Month YYYY date tokens from ±150 chars before each <sup>\n# ref + from the matching <li id=\"fn-N\"> body. If a claim mentions a Month YYYY\n# NOT present in the footnote body's URL+title+description AND the footnote body\n# lacks an acknowledgement marker, flag.\n#\n# WARN tier; promote to BLOCKER after 2 briefs stable.\n_FN_SCOPE_SUP_RE = re.compile(\n    r'<sup\\b[^>]*>\\s*<a\\b[^>]*\\bhref=[\"\\']#fn-([\\w-]+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\n_FN_SCOPE_LI_RE = re.compile(\n    r'<li\\b[^>]*\\bid=[\"\\']fn-([\\w-]+)[\"\\'][^>]*>(.*?)</li>',\n    re.IGNORECASE | re.DOTALL,\n)\n_FN_SCOPE_ACK_MARKERS = (\n    'source-scope note',\n    'fuente-alcance',\n    'nota de alcance',\n    '[sd-attribution]',\n    '[sd-aggregate]',\n    '[sd-estimate]',\n    '[sd-inference]',\n    '[sd-pattern]',\n)\n# Attribution keywords — require Month-YYYY to appear in attribution context\n# (past dated statement/revision/testimony) for the check to flag. Forward\n# projections like \"scheduled by June 2026\" or \"assesses August 2026 realistic\"\n# are out-of-scope because the source typically projects them itself.\n# Month abbreviation → month index (for \"Nov 2025\", \"Jun 2025\" style refs)\n_FN_SCOPE_MONTH_ABBR = {\n    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,\n    'jul': 7, 'aug': 8, 'sep': 9, 'sept': 9, 'oct': 10, 'nov': 11, 'dec': 12,\n    'ene': 1, 'febr': 2, 'marz': 3, 'abr': 4, 'mayo': 5,\n    'ago': 8, 'dic': 12,\n}\n_FN_SCOPE_MONTH_ABBR_RE = re.compile(\n    r'\\b(' + '|'.join(sorted(_FN_SCOPE_MONTH_ABBR.keys(), key=len, reverse=True)) +\n    r')\\.?\\s+(\\d{4})\\b',\n    re.IGNORECASE,\n)\n_FN_SCOPE_ATTRIB_KEYWORDS = (\n    'testimony', 'testimonio', 'statement', 'declaración', 'remarks',\n    'revision', 'revisión', 'speech', 'discurso', 'address',\n    'announcement', 'anuncio', 'announced', 'anunció',\n    'press release', 'comunicado', 'op-ed', 'interview', 'entrevista',\n    'comments', 'comentarios', 'cited', 'citado', 'said', 'dijo',\n    'told', 'wrote', 'escribió', 'declaration', 'declaró',\n)\n_FN_SCOPE_TAG_RE = re.compile(r'<[^>]+>')\n# Multi-source attribution pattern: [WEB: A; B; C] indicates author explicitly\n# cited multiple sources. Dates inside such blocks count as implicit footnote\n# scope (the author signaled the date is in cited source). Single-source\n# [WEB: X] without semicolons does NOT add implicit scope — dates outside\n# the block (e.g., \"Nov 2025 revision\" in prose) still require footnote\n# anchoring. Brief 7 fn-1 canonical case: claim cites\n# `[WEB: JC 2026 10; Joint Committee Press Release 18 November 2025]` →\n# multi-source → November 2025 in scope → no flag.\n_FN_SCOPE_WEB_BLOCK_RE = re.compile(r'\\[WEB:\\s*([^\\]]+)\\]', re.IGNORECASE)\n\n\ndef check_footnote_scope_date_mismatch(html: str) -> str | None:\n    \"\"\"Detect footnotes anchoring claims from different dates without scope-note.\n\n    For each <sup><a href=\"#fn-N\">N</a></sup> reference in the body, extract\n    Month-YYYY date tokens from the surrounding ±150 char claim context. Compare\n    against Month-YYYY tokens in the matching <li id=\"fn-N\"> body. If a claim\n    mentions a date NOT covered by the footnote AND the footnote body has no\n    acknowledgement marker (Source-scope note / [SD-attribution] / [SD-aggregate]\n    / [SD-estimate] / [SD-inference]), flag.\n\n    Brief with zero footnote markup → returns None.\n\n    Origin: AUTOMATION-CANDIDATES-2026-05-25-CRITICAL-REVIEW-SESSION-01 +\n    BRIEF-8-CRITICAL-REVIEW-FOLLOWUP-01 Phase A. Canonical instance: Brief 8\n    fn-8 Bessent Jul-2025 source anchoring Nov-2025 + Jun-2025 claims.\n    \"\"\"\n    # Build map of footnote bodies (by full key including -es suffix if any)\n    fn_bodies: dict[str, str] = {}\n    for m in _FN_SCOPE_LI_RE.finditer(html):\n        fn_bodies[m.group(1)] = m.group(2)\n    if not fn_bodies:\n        return None\n\n    sup_refs = list(_FN_SCOPE_SUP_RE.finditer(html))\n    if not sup_refs:\n        return None\n\n    # Track unique mismatches per footnote (one flag per fn-key suffices for sample)\n    flagged: dict[str, list[str]] = {}\n\n    for sup_match in sup_refs:\n        fn_key = sup_match.group(1)\n        body = fn_bodies.get(fn_key)\n        if body is None:\n            # Orphan ref — handled by check_footnote_integrity; skip here\n            continue\n\n        # Skip if footnote body has an acknowledgement marker\n        body_lower = body.lower()\n        if any(marker in body_lower for marker in _FN_SCOPE_ACK_MARKERS):\n            continue\n\n        # Extract claim context: scan backward from <sup> ref to the NEAREST of\n        # (a) previous <sup>...</sup> end (delimits prior claim's scope),\n        # (b) <p>/<td> open tag (paragraph/cell boundary),\n        # (c) 800-char floor.\n        # Capping at previous <sup> ref prevents pulling dates from sibling\n        # claims in the same paragraph (each <sup> anchors its own claim).\n        ref_pos = sup_match.start()\n        ctx_floor = max(0, ref_pos - 800)\n        backward = html[ctx_floor:ref_pos]\n        # Find latest paragraph/cell boundary\n        boundary_idx = -1\n        for tag in ('<p>', '<p ', '<td>', '<td ', '<li ', '<li>'):\n            idx = backward.rfind(tag)\n            if idx > boundary_idx:\n                boundary_idx = idx\n        # Find latest end of previous <sup>...</sup> (claim delimiter)\n        prev_sup_end = backward.rfind('</sup>')\n        # Use the LATER of the two (closer to ref_pos)\n        cut_idx = max(boundary_idx, prev_sup_end)\n        if cut_idx > -1:\n            ctx_start = ctx_floor + cut_idx\n        else:\n            ctx_start = ctx_floor\n        ctx_end = min(len(html), ref_pos + 50)\n        claim_ctx = html[ctx_start:ctx_end]\n        claim_text = _FN_SCOPE_TAG_RE.sub(' ', claim_ctx)\n\n        # Extract footnote dates — strip tags from body\n        body_text = _FN_SCOPE_TAG_RE.sub(' ', body)\n\n        # Collect Month-YYYY tokens from both. Normalize to (month_idx, year).\n        # Includes full month names + abbreviations (Nov 2025, Jun 2025 style).\n        def _dates(text: str) -> set:\n            out: set = set()\n            for mm in _DATE_MONTH_YEAR_RE.finditer(text):\n                mon = mm.group(1).lower()\n                yr = int(mm.group(2))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    out.add((_DATE_MONTH_INDEX[mon], yr))\n            for mm in _DATE_MD_Y_RE.finditer(text):\n                mon = mm.group(1).lower()\n                yr = int(mm.group(3))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    out.add((_DATE_MONTH_INDEX[mon], yr))\n            for mm in _DATE_DM_Y_RE.finditer(text):\n                mon = mm.group(2).lower()\n                yr = int(mm.group(3))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    out.add((_DATE_MONTH_INDEX[mon], yr))\n            for mm in _FN_SCOPE_MONTH_ABBR_RE.finditer(text):\n                abbr = mm.group(1).lower()\n                yr = int(mm.group(2))\n                if abbr in _FN_SCOPE_MONTH_ABBR and 2010 <= yr <= 2035:\n                    out.add((_FN_SCOPE_MONTH_ABBR[abbr], yr))\n            return out\n\n        # Per-date attribution check: each claim Month-YYYY must have an\n        # attribution keyword within ±50 chars to count as an attributed claim.\n        # Without attribution keyword, the date is likely a forward projection\n        # from the source itself (not a separate attribution).\n        def _attributed_dates(text: str) -> set:\n            out: set = set()\n            text_lower = text.lower()\n\n            def _check_attr(span_start: int, span_end: int, mo: int, yr: int) -> None:\n                attr_start = max(0, span_start - 50)\n                attr_end = min(len(text), span_end + 50)\n                window = text_lower[attr_start:attr_end]\n                if any(kw in window for kw in _FN_SCOPE_ATTRIB_KEYWORDS):\n                    out.add((mo, yr))\n\n            for mm in _DATE_MONTH_YEAR_RE.finditer(text):\n                mon = mm.group(1).lower()\n                yr = int(mm.group(2))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    _check_attr(mm.start(), mm.end(), _DATE_MONTH_INDEX[mon], yr)\n            for mm in _DATE_MD_Y_RE.finditer(text):\n                mon = mm.group(1).lower()\n                yr = int(mm.group(3))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    _check_attr(mm.start(), mm.end(), _DATE_MONTH_INDEX[mon], yr)\n            for mm in _DATE_DM_Y_RE.finditer(text):\n                mon = mm.group(2).lower()\n                yr = int(mm.group(3))\n                if mon in _DATE_MONTH_INDEX and 2010 <= yr <= 2035:\n                    _check_attr(mm.start(), mm.end(), _DATE_MONTH_INDEX[mon], yr)\n            for mm in _FN_SCOPE_MONTH_ABBR_RE.finditer(text):\n                abbr = mm.group(1).lower()\n                yr = int(mm.group(2))\n                if abbr in _FN_SCOPE_MONTH_ABBR and 2010 <= yr <= 2035:\n                    _check_attr(mm.start(), mm.end(), _FN_SCOPE_MONTH_ABBR[abbr], yr)\n            return out\n\n        claim_dates = _attributed_dates(claim_text)\n        if not claim_dates:\n            continue\n        footnote_dates = _dates(body_text)\n        if not footnote_dates:\n            # Footnote body has no date scope — cannot validate; skip (avoids\n            # false positives on framework/standard footnotes like ISO 20022)\n            continue\n\n        # Multi-source attribution: [WEB: A; B; C] — dates inside such blocks\n        # count as implicit footnote scope (author explicitly cited multiple\n        # sources). Only semicolon-separated multi-source attributions add\n        # implicit scope; single-source [WEB: X] does not.\n        implicit_scope: set = set()\n        for web_match in _FN_SCOPE_WEB_BLOCK_RE.finditer(claim_text):\n            block = web_match.group(1)\n            if ';' not in block:\n                continue\n            implicit_scope |= _dates(block)\n\n        effective_scope = footnote_dates | implicit_scope\n\n        # Mismatch = claim's attributed date not in footnote scope (exact month+year match)\n        mismatches = claim_dates - effective_scope\n        if mismatches:\n            # Build sample with claim snippet (cleaned)\n            snippet = re.sub(r'\\s+', ' ', claim_text).strip()\n            if len(snippet) > 100:\n                snippet = '…' + snippet[-100:]\n            mm_strs = sorted(f'{_DATE_EN_MONTHS[mo-1].title()} {yr}' for mo, yr in mismatches)\n            entry = f'fn-{fn_key}: attributed claim ({mm_strs[0]}) not in footnote scope ({snippet!r})'\n            flagged.setdefault(fn_key, []).append(entry)\n\n    if not flagged:\n        return None\n\n    samples = []\n    for fn_key, entries in sorted(flagged.items()):\n        samples.append(entries[0])\n    samples = samples[:3]\n    return (\n        f'footnote_scope_date_mismatch: {len(flagged)} footnote(s) with date-scope mismatch — '\n        f'claim cites a Month-YYYY not in footnote source-scope, and footnote body lacks '\n        f'a [SD-attribution]/[SD-aggregate]/[SD-estimate] marker or Source-scope note. '\n        f'Samples: ' + ' | '.join(samples)\n    )\n\n\n_DATE_NUM_WORDS = {\n    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,\n    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,\n    'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14,\n    'fifteen': 15, 'sixteen': 16, 'seventeen': 17, 'eighteen': 18,\n    'nineteen': 19, 'twenty': 20, 'thirty': 30, 'forty': 40, 'fifty': 50,\n    'uno': 1, 'una': 1, 'dos': 2, 'tres': 3, 'cuatro': 4, 'cinco': 5,\n    'seis': 6, 'siete': 7, 'ocho': 8, 'nueve': 9, 'diez': 10,\n    'once': 11, 'doce': 12, 'trece': 13, 'catorce': 14, 'quince': 15,\n    'dieciséis': 16, 'diecisiete': 17, 'dieciocho': 18,\n    'diecinueve': 19, 'veinte': 20, 'treinta': 30, 'cuarenta': 40,\n    'cincuenta': 50,\n}\n_DATE_UNIT_DAYS = {\n    'day': 1, 'days': 1, 'día': 1, 'días': 1,\n    'week': 7, 'weeks': 7, 'semana': 7, 'semanas': 7,\n    'month': 30, 'months': 30, 'mes': 30, 'meses': 30,\n    'year': 365, 'years': 365, 'año': 365, 'años': 365,\n}\n_DATE_EN_MONTHS = ['january', 'february', 'march', 'april', 'may', 'june',\n                   'july', 'august', 'september', 'october', 'november', 'december']\n_DATE_ES_MONTHS = ['enero', 'febrero', 'marzo', 'abril', 'mayo', 'junio',\n                   'julio', 'agosto', 'septiembre', 'octubre', 'noviembre', 'diciembre']\n_DATE_MONTH_INDEX = {\n    **{m: i + 1 for i, m in enumerate(_DATE_EN_MONTHS)},\n    **{m: i + 1 for i, m in enumerate(_DATE_ES_MONTHS)},\n}\n_DATE_CLAIM_RE = re.compile(\n    r'\\b(\\d{1,3}|' + '|'.join(sorted(_DATE_NUM_WORDS, key=len, reverse=True)) + r')\\s+'\n    r'(' + '|'.join(sorted(_DATE_UNIT_DAYS, key=len, reverse=True)) + r')\\s+'\n    r'(?:after|tras|después\\s+de)\\b',\n    re.IGNORECASE,\n)\n# DATE-ARITH Phase 2 — duration-to-end-date pattern (Brief 6 origin 2026-05-08).\n# Catches \"N-unit [noun] running to / expires / expira / hasta DATE\" — explicit\n# duration tied to an explicit end date. The Phase 1 _DATE_CLAIM_RE catches\n# \"N units after [event]\" but skips when window dates span >1.2× claim\n# (treated as unrelated dates). For the running-to construction the dates\n# ARE the claim's anchors; both directions of mismatch matter.\n# Brief 6 case: \"6-month suspension running to 2026-11-10\" + \"October 2025\n# truce\" in window. claimed=180d, actual≈400d, ratio≈2.2.\n_DATE_RUNNING_TO_RE = re.compile(\n    r'\\b(\\d{1,3})[\\s–—-]+'  # N + (space|hyphen|en-dash|em-dash)\n    r'(' + '|'.join(sorted(_DATE_UNIT_DAYS, key=len, reverse=True)) + r')\\b'\n    r'(?:\\s+[\\wÀ-ſ-]+){0,4}?\\s+'  # 0-4 filler words (suspension, pause, etc.)\n    r'(?:running\\s+to|expires?(?:\\s+on)?|que\\s+expira(?:r[áa])?\\s+(?:el\\s+)?|'\n    r'expira(?:r[áa])?\\s+(?:el\\s+)?|hasta\\s+(?:el\\s+)?|through\\s+|until\\s+(?:the\\s+)?|'\n    r'ending\\s+(?:on\\s+)?|ends?\\s+(?:on\\s+)?|finaliz(?:a|ar[áa])\\s+(?:el\\s+)?)'\n    r'\\s*(?:<[^>]+>\\s*)?(?:el\\s+)?'  # optional inline HTML or article\n    r'(\\d{4}-\\d{2}-\\d{2}|\\d{1,2}-\\d{1,2}-\\d{4})',  # ISO or DD-MM-YYYY\n    re.IGNORECASE,\n)\n# Month-year reference, e.g. \"October 2025\" / \"octubre de 2025\" / \"octubre 2025\"\n_DATE_MONTH_YEAR_RE = re.compile(\n    r'\\b(' + '|'.join(_DATE_MONTH_INDEX.keys()) + r')\\s+(?:de\\s+)?(\\d{4})\\b',\n    re.IGNORECASE,\n)\n_DATE_MD_Y_RE = re.compile(\n    r'\\b(' + '|'.join(_DATE_EN_MONTHS + _DATE_ES_MONTHS) + r')\\s+(\\d{1,2}),?\\s+(\\d{4})\\b',\n    re.IGNORECASE,\n)\n_DATE_DM_Y_RE = re.compile(\n    r'\\b(\\d{1,2})\\s+(?:de\\s+)?(' + '|'.join(_DATE_EN_MONTHS + _DATE_ES_MONTHS) + r')\\s+(?:de\\s+)?(\\d{4})\\b',\n    re.IGNORECASE,\n)\n\n\n_MDTOHTML_BLOCK_WRAP_RE = re.compile(\n    r'<p>\\s*</?(?:section|ol|ul|div|table|h[1-6])\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_mdtohtml_paragraph_wrap_block_tag(html: str) -> str | None:\n    \"\"\"Detect block-level HTML tags wrapped in <p> by mdToHTML.\n\n    The Forecaster's mdToHTML JS function in Format Output treats\n    blank-line-separated tokens as paragraphs. When the model emits raw\n    block-level HTML (<section>, <ol>, <ul>, <div>, <table>, <h1-6>) inside\n    a markdown section per EDITORIAL-FOOTNOTE-01 / sources instructions,\n    mdToHTML wraps those tags in <p> producing malformed markup like\n    `<p><section class=\"sources\"></p>` and `<p></ol></p>`.\n\n    Browser parsers tolerate this (auto-close the <p> when a block tag\n    opens), but the structure is wrong: accessibility tooling chokes,\n    parent-child relationships are broken, and CSS selectors that depend\n    on direct ancestry fail. Brief 5 European Defense shipped with this\n    pattern in its sources block (commit fe8c9e1 retroactive fix).\n\n    Inline tags (<strong>, <em>, <a>, <span>) are valid inside <p> and\n    NOT flagged. Only block-level tags trigger this check.\n\n    WARNING — every hit is a real markup bug, but only 1+ is needed for\n    a reader-visible problem (sources block, footnotes ol, etc.).\n\n    Cross-ref memory: feedback_mdtohtml_block_tag_paragraph_wrap.md\n    (3 fix paths; this check implements the cheapest — post-generation\n    detection. Patching mdToHTML or restructuring EDITORIAL_PROMPT are\n    the upstream fixes that would prevent the bug from being emitted).\n    \"\"\"\n    hits = list(_MDTOHTML_BLOCK_WRAP_RE.finditer(html))\n    if not hits:\n        return None\n    # Sample the first two contexts for the failure log\n    samples = []\n    for m in hits[:2]:\n        s = max(0, m.start() - 25)\n        e = min(len(html), m.end() + 50)\n        samples.append(html[s:e].replace('\\n', ' '))\n    return (f'{len(hits)} block-level HTML tag(s) wrapped in <p> by mdToHTML — '\n            f'malformed markup; sample: {\" | \".join(samples)}')\n\n\n# ── L01: investment-recommendation patterns (regulatory hygiene) ──────────────\n# Cross-ref: data/facts.yaml + EDITORIAL-LEGAL-01/02 + D5-LICENSING.\n# Started as WARNING; promote to BLOCKER once 1 week of clean runs across\n# briefs 5+ confirms the pattern set is calibrated.\n\n# Stock tickers with major exchange suffixes (Frankfurt, Milan, London, Paris,\n# Brussels, Madrid, Amsterdam, Frankfurt-Xetra, Toronto, Hong Kong, Helsinki,\n# Stockholm, Mexico, Copenhagen, Madrid, Vienna, US, NYSE, NASDAQ).\n_L01_TICKER_RE = re.compile(\n    r'\\b[A-Z]{1,5}\\.(?:DE|MI|L|PA|BR|MC|AS|FR|TO|HK|HE|ST|MX|CO|MA|VI|US|N|OQ)\\b'\n)\n\n# Equity-research vocabulary that signals an instrument-level recommendation.\n# Deliberately conservative — generic \"Verdict:\" and bare \"Buy\"/\"Sell\" excluded\n# because briefs use \"Verdict\" as editorial-conclusion structure, not equity rating.\n_L01_VERDICT_PATTERNS = (\n    r'\\bStrong fundamental case\\b',\n    r'\\b(?:asymmetric|structural)\\s+upside\\b',\n    r'\\bhigh[\\-\\s]conviction\\s+long\\b',\n    r'\\b(?:Outperform|Underperform|Overweight|Underweight)\\s+rating\\b',\n    r'\\b(?:Buy|Sell)\\s+rating\\b',\n    r'\\bHold\\s+with\\s+asymmetric\\b',\n)\n_L01_VERDICT_RE = re.compile('|'.join(_L01_VERDICT_PATTERNS), re.IGNORECASE)\n\n# Implicit instrument-level recommendations.\n_L01_IMPLICIT_PATTERNS = (\n    r'\\b(?:currently|presently)\\s+(?:underpriced|overpriced|underweight|overweight)\\b',\n    r'\\bbenefits\\s+from\\s+safe[\\-\\s]haven\\s+demand\\b',\n    r'\\btailwind\\s+for\\s+[A-Z][a-zA-Z]+\\b',  # \"tailwind for Rheinmetall\"\n    r'\\bis\\s+the\\s+\\w+\\s+play\\b',  # \"is the rearmament play\"\n)\n_L01_IMPLICIT_RE = re.compile('|'.join(_L01_IMPLICIT_PATTERNS), re.IGNORECASE)\n\n\ndef check_investment_recommendation_patterns(html: str) -> str | None:\n    \"\"\"Detect investment-recommendation patterns that require regulatory licensing.\n\n    Three categories:\n      1. Stock tickers with exchange suffix (e.g. RHM.DE, LDO.MI, BA.L)\n      2. Equity-research verdict vocabulary (Strong fundamental case, Outperform,\n         high-conviction long, asymmetric upside trigger, etc.)\n      3. Implicit instrument-level recommendations (currently underpriced,\n         tailwind for [Company], is the X play)\n\n    Per D5-LICENSING (operator decision pending): SD has no CNMV/EAF license.\n    Mitigation while editorial-only: keep analysis above the instrument level.\n    Sectors yes, mechanisms yes, predictions yes; tickers no, equity verdicts no.\n\n    WARNING level for the first week post-deployment. Promote to BLOCKER once\n    false-positive rate is calibrated across the 5+ existing briefs.\n    \"\"\"\n    hits = []\n    for m in _L01_TICKER_RE.finditer(html):\n        hits.append(f'ticker:{m.group(0)}')\n    for m in _L01_VERDICT_RE.finditer(html):\n        hits.append(f'verdict:{m.group(0)[:40]}')\n    for m in _L01_IMPLICIT_RE.finditer(html):\n        hits.append(f'implicit:{m.group(0)[:40]}')\n    if not hits:\n        return None\n    # Deduplicate while preserving order\n    seen = set()\n    uniq = [h for h in hits if not (h in seen or seen.add(h))]\n    return (f'{len(hits)} investment-recommendation pattern hit(s) — '\n            f'L01 regulatory-hygiene flag (D5-LICENSING pending); '\n            f'samples: {\", \".join(uniq[:6])}')\n\n\n# ── L03: listed-entity density (sector vs instrument boundary) ───────────────\n# Cross-ref EDITORIAL-LEGAL-02 L03 + D5-LICENSING. Where L01 catches explicit\n# tickers and verdict vocabulary, L03 catches the softer pattern: a listed\n# company named ≥3× and surrounded by financial-metric tokens (revenue,\n# EBITDA, target price, P/E, etc.). Even without a verdict, that combination\n# reads as instrument-level analysis. Mitigation while editorial-only:\n# rephrase at the sector/mechanism level. Catalogue lives at\n# `data/listed-entities.yaml` (chat-Claude proposes additions per role 2 in\n# EDITORIAL-LEGAL-06).\n\n_LISTED_ENTITIES_CACHE: list | None = None\n\n\ndef _load_listed_entities() -> list:\n    \"\"\"Load data/listed-entities.yaml once. Returns [] if missing or yaml not installed.\"\"\"\n    global _LISTED_ENTITIES_CACHE\n    if _LISTED_ENTITIES_CACHE is not None:\n        return _LISTED_ENTITIES_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _LISTED_ENTITIES_CACHE = []\n        return _LISTED_ENTITIES_CACHE\n    path = os.environ.get(\n        'LISTED_ENTITIES_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'listed-entities.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or []\n    except OSError:\n        data = []\n    if not isinstance(data, list):\n        data = []\n    _LISTED_ENTITIES_CACHE = data\n    return _LISTED_ENTITIES_CACHE\n\n\n# Instrument-level metric tokens (EN+ES). Distinct from L01 verdict\n# vocabulary AND from generic financial vocabulary used in sectoral analysis\n# (\"revenue\", \"valuation\", \"EBITDA\" alone are too broad — a brief about a\n# strategic-chokepoint company will mention revenue without recommending it).\n# These tokens require trailing context signalling specific-instrument\n# analysis: target price, share price, per-share metrics with a number,\n# trading-at-discount framings, etc.\n_L03_METRIC_RE = re.compile(\n    # Target/price-target with number context (EN+ES)\n    r'\\btarget\\s+price\\s+of\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\bprecio\\s+objetivo\\s+(?:de\\s+)?[\\€\\$£]\\s*\\d+|'\n    r'\\b(?:price|precio)\\s+target\\s+of\\b|'\n    # Per-share metrics with number\n    r'\\bP/E\\s+(?:ratio\\s+)?(?:of|de|near|around|circa)\\s+\\d+|'\n    r'\\bPER\\s+(?:ratio\\s+)?(?:de|aproximado|cercano)\\s+\\d+|'\n    r'\\bEPS\\s+(?:of|de)\\s+[\\€\\$£]?\\s*\\d+|'\n    r'\\bBPA\\s+(?:de|aproximado)\\s+[\\€\\$£]?\\s*\\d+|'\n    # Trading-at-X framings\n    r'\\b(?:trading|cotizando?|cotiza)\\s+(?:at|en|a)\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\b(?:trading|cotizando?|cotiza)\\s+(?:at|a)\\s+\\d+(?:\\.\\d+)?\\s*(?:×|x|times)\\s+earnings|'\n    # Dividend/market-cap with explicit value\n    r'\\bdividend\\s+yield\\s+(?:of|near|approximately)\\s+\\d+(?:\\.\\d+)?\\s*%|'\n    r'\\brentabilidad\\s+por\\s+dividendo\\s+(?:de|del)\\s+\\d+(?:\\.\\d+)?\\s*%|'\n    r'\\bmarket\\s+cap(?:italization)?\\s+of\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\bcapitalizaci[óo]n\\s+burs[áa]til\\s+de\\s+[\\€\\$£]\\s*\\d+|'\n    # Quarterly/FY guidance + value\n    r'\\bQ[1-4]\\s+(?:revenue|earnings|results)\\s+(?:of|de|beat|missed|exceeded)|'\n    r'\\bFY\\s*20\\d{2}\\s+(?:revenue|earnings|guidance)\\s+(?:of|de)\\s+[\\€\\$£]\\s*\\d+|'\n    # Discount/premium-to-X framings (instrument-level)\n    r'\\btrading\\s+at\\s+\\d+(?:\\.\\d+)?\\s*%\\s+(?:discount|premium)|'\n    r'\\bcotiza\\s+con\\s+(?:un\\s+)?(?:descuento|prima)\\s+del?\\s+\\d+',\n    re.IGNORECASE,\n)\n\n\n# ── A5 / D5.5: sources density floor ─────────────────────────────────────────\n# Origin: Iberian Blackout audit (EL-04) found brief lists 5 sources where\n# baseline editorial density should be 12-13. Threshold ≥10 conservative\n# floor with margin. Counts distinct URLs across footnote bodies + a\n# dedicated sources block. Skips infrastructure URLs (CC license, brief\n# canonical, briefs index, substack, social) so floor reflects editorial\n# sourcing, not template plumbing.\n\n_A5_INFRA_URL_RE = re.compile(\n    r'(?:'\n    r'creativecommons\\.org'\n    r'|substack\\.com'\n    r'|shadowdynamics\\.ai/(?:about|methodology|terms|disclaimer|privacy|briefs/?$|index\\.html)?'\n    r'|clave\\.press/(?:about|methodology|terms|disclaimer|privacy)?'\n    r'|twitter\\.com/.*shadowdynamics'\n    r'|x\\.com/.*shadowdynamics'\n    r'|linkedin\\.com/(?:company/)?shadow-dynamics'\n    r')',\n    re.IGNORECASE,\n)\n\n\ndef check_sources_density_minimum(html: str) -> str | None:\n    \"\"\"Brief sources block + footnote bodies should carry ≥10 distinct URLs.\n\n    Sources counted:\n      - URLs inside <li id=\"fn-N\">…</li> footnote bodies\n      - URLs inside an explicit sources block (h2/h3 with text matching\n        \"Sources\" / \"Fuentes\" / \"Sources & methodology\") until the next h2/h3.\n\n    Excluded:\n      - Infrastructure URLs (CC license, brief canonical, briefs index,\n        substack, social) — see _A5_INFRA_URL_RE.\n      - URLs anywhere outside footnotes/sources block (e.g. inline\n        Pattern Recognition links don't count toward the editorial floor).\n\n    Conditional: brief with 0 footnotes AND no sources block → no-op\n    (legacy briefs without footnote markup; pre-F3 corpus). Cross-ref\n    EDITORIAL-LEGAL-06 A5 + EL-04 D5.5.\n\n    Threshold ≥10 (D5.5 target was 12-13; floor with margin).\n    \"\"\"\n    urls: set[str] = set()\n\n    # Collect URLs from footnote bodies.\n    for m in re.finditer(r'<li\\s+id=[\"\\']fn-\\d+[\"\\'][^>]*>(.*?)</li>',\n                          html, re.IGNORECASE | re.DOTALL):\n        for um in re.finditer(r'https?://[^\\s<>\"\\']+', m.group(1)):\n            url = um.group(0).rstrip('.,;:)')\n            if not _A5_INFRA_URL_RE.search(url):\n                urls.add(url)\n\n    # Collect URLs from an explicit Sources/Fuentes block.\n    src_header = re.search(\n        r'<(h[23])[^>]*>[^<]{0,40}(?:Sources?|Fuentes?|Bibliography|Bibliografía)\\b[^<]{0,40}</\\1>',\n        html, re.IGNORECASE,\n    )\n    if src_header:\n        # Block extends until the next h2/h3 or end of body.\n        start = src_header.end()\n        m_next = re.search(r'<h[23]\\b', html[start:], re.IGNORECASE)\n        end = start + m_next.start() if m_next else len(html)\n        block = html[start:end]\n        for um in re.finditer(r'https?://[^\\s<>\"\\']+', block):\n            url = um.group(0).rstrip('.,;:)')\n            if not _A5_INFRA_URL_RE.search(url):\n                urls.add(url)\n\n    # Conditional skip: legacy brief without footnote markup AND no sources block.\n    has_footnotes = bool(re.search(r'<li\\s+id=[\"\\']fn-\\d+[\"\\']', html, re.I))\n    if not has_footnotes and not src_header:\n        return None\n\n    if len(urls) >= 10:\n        return None\n    return (\n        f'editorial source density below floor: {len(urls)} distinct URL(s) '\n        f'in footnotes+sources block (target ≥10, baseline 12-13 per EL-04 '\n        f'D5.5) — strengthen sourcing in next revision'\n    )\n\n\n# ── A4 / B5-PATTERN: cross-lingual topic self-reference ──────────────────────\n# Origin: Spain Blackout brief 2026-04-27 had EN block saying \"structurally\n# identical to the one identified in Shadow Dynamics' Iberian Blackout\n# analysis\" — but the brief IS the Iberian Blackout brief. The ES half\n# correctly referenced \"el análisis de Shadow Dynamics sobre los puertos\n# europeos de COSCO\" (a different prior brief). Bug class: model treats the\n# current brief as a previous external reference. Fix in commit 9cec1a0.\n# Detection: extract h1's topic kernel; if \"Shadow Dynamics(['s])? [topic]\n# (analysis|brief|report)\" appears in body, that's likely self-reference.\n\n_A4_TOPIC_STOPWORDS = {\n    'the', 'and', 'for', 'with', 'from', 'into', 'about',\n    'analysis', 'brief', 'report', 'shadow', 'dynamics',\n    'una', 'del', 'los', 'las', 'sobre', 'desde', 'hasta',\n    'análisis', 'informe', 'los', 'estudio',\n}\n\n\ndef check_cross_lingual_topic_consistency(html: str) -> str | None:\n    \"\"\"Detect self-reference to current brief topic as if it were external.\n\n    Algorithm:\n      1. Extract <h1> inner text; strip tags/entities.\n      2. Take the segment before first ':' or '—' (topic portion).\n      3. Pull significant words (≥4 chars, not stopwords).\n      4. If topic kernel has ≥2 words, search body for\n         `Shadow Dynamics(?:'s|')? <topic-words> (analysis|brief|report)` —\n         if found, flag as likely self-reference.\n\n    WARNING — may have false positives where a brief legitimately\n    references its own headline elsewhere in the body. Pre-publish reviewer\n    confirms or dismisses. Cross-ref EDITORIAL-LEGAL-06 A4 + bug class\n    B5-PATTERN-EN (commit 9cec1a0).\n    \"\"\"\n    m_h1 = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.IGNORECASE | re.DOTALL)\n    if not m_h1:\n        return None\n    h1_text = re.sub(r'<[^>]+>', ' ', m_h1.group(1))\n    h1_text = re.sub(r'&[a-z]+;', ' ', h1_text, flags=re.IGNORECASE)\n    h1_text = re.sub(r'\\s+', ' ', h1_text).strip()\n    if not h1_text:\n        return None\n    # Topic portion = everything before first ':' or '—'\n    topic_part = re.split(r'[:—\\-–]', h1_text, maxsplit=1)[0].strip()\n    topic_words = [\n        w for w in re.split(r'\\W+', topic_part)\n        if len(w) >= 4 and w.lower() not in _A4_TOPIC_STOPWORDS\n    ]\n    if len(topic_words) < 2:\n        return None\n    # Build a phrase pattern that requires consecutive topic words within 30 chars.\n    topic_phrase = r'\\b' + r'\\s+'.join(re.escape(w) for w in topic_words[:3]) + r'\\b'\n    self_ref_re = re.compile(\n        r'\\bShadow\\s+Dynamics(?:\\'s|\\')?\\s+[^<]{0,40}'\n        + topic_phrase\n        + r'[^<]{0,40}\\b(?:analysis|brief|report|análisis|informe|estudio)\\b',\n        re.IGNORECASE,\n    )\n    # Also reverse form: \"in Shadow Dynamics' analysis (of|sobre) [topic]\"\n    self_ref_re_rev = re.compile(\n        r'\\bShadow\\s+Dynamics(?:\\'s|\\')?\\s+(?:analysis|brief|report|análisis|informe|estudio)\\s+(?:of|on|sobre)\\s+'\n        + r'[^<]{0,40}'\n        + topic_phrase,\n        re.IGNORECASE,\n    )\n    matches = list(self_ref_re.finditer(html)) + list(self_ref_re_rev.finditer(html))\n    if not matches:\n        return None\n    samples = [re.sub(r'\\s+', ' ', m.group(0))[:120] for m in matches[:2]]\n    return (f'{len(matches)} self-reference(s) to current brief topic '\n            f'\"{topic_part}\" framed as external Shadow Dynamics work — '\n            f'B5-PATTERN-EN class: {\" | \".join(samples)}')\n\n\n# ── D01: anchor-number → footnote-URL gate ───────────────────────────────────\n# Cross-ref EDITORIAL-LEGAL-02 D01. Every magnitude wrapped in <strong>/<b>\n# (i.e. an \"anchor number\" the brief asks the reader to remember) must point\n# to a footnote whose body carries either a hyperlink (http(s)) or a known\n# source-org name. Anchor numbers attached to fn-N entries that are nothing\n# but \"SD estimate\" / \"internal inference\" don't satisfy the contract — that\n# was the Brief 5 pattern (S01 caught the ratio; D01 catches the per-anchor\n# integrity). WARNING for one baseline week, then promote to BLOCKER.\n# Conditional: brief with 0 <li id=\"fn-N\"> entries → no-op (legacy/pre-F3).\n\n_D01_MAGNITUDE_RE = re.compile(\n    r'(?:'\n    r'\\b\\d+(?:[.,]\\d+)?\\s*%'                                  # 5%, 24.9%\n    r'|[€$£]\\s*\\d+(?:[.,]\\d+)?\\s*(?:[BMK]|bn|mn|tn|billion|million|trillion|millones|miles\\s+de\\s+millones)?\\b'\n    r'|\\b\\d+(?:[.,]\\d+)?\\s*(?:GW|MW|TWh|GWh|km|tons?|toneladas|barrels|barriles)\\b'\n    r'|\\b\\d{1,3}(?:[,.]\\d{3})+\\b'                            # 1,000,000 / 1.000.000\n    r')',\n    re.IGNORECASE,\n)\n\n_D01_FN_REF_RE = re.compile(\n    # Accepts both EN-tab `#fn-N` and ES-tab `#fn-N-es` (canonical bilingual\n    # convention per Brief 7+; pre-fix this regex was EN-only which made\n    # every legitimately-anchored ES magnitude appear as a strike).\n    r'<sup[^>]*>\\s*<a\\s+href=[\"\\']#fn-(\\d+)(?:-es)?[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\n\n_D01_FN_BODY_RE = re.compile(\n    # Bilingual footnote scaffolding: <li id=\"fn-N\"> (EN) + <li id=\"fn-N-es\">\n    # (ES). Both forms map to the same fn_bodies key (the digit) since EN\n    # and ES bodies hold the same source attribution.\n    r'<li\\s+id=[\"\\']fn-(\\d+)(?:-es)?[\"\\'][^>]*>(.*?)</li>',\n    re.IGNORECASE | re.DOTALL,\n)\n\n_D01_URL_OR_SOURCE_RE = re.compile(\n    r'https?://[^\\s<>\"\\']+'\n    r'|\\b(?:' + SOURCE_ORG_NAMES + r')\\b'\n    r'|\\bBOE-A-\\d{4}-\\d+\\b'                                  # Spanish Official Bulletin refs\n    r'|\\b(?:SIPRI|MERICS|Bruegel|EPRS|Carnegie|Politico|Statista|EDA|EIB|CNAT|Inelfe)\\b',\n    re.IGNORECASE,\n)\n\n# Structural-exclusion regex (added 2026-05-27 evening per audit of Brief 8\n# Payments W22 89-hit cluster). The D01 check fires on any <strong>/<b>\n# containing a magnitude; but several bolded magnitude patterns are\n# structural-internal markers, not external claims requiring URL footnotes:\n#   1. CONFIDENCE BANDS — pure-range probabilities like \"60–75%\", \"55–65 %\"\n#      that label the probability range of a §XIV PRED entry. The probability\n#      itself is the brief's own forecast, not an external fact to source.\n#   2. PRED / SIGNAL / SEÑAL / ALERT / SCENARIO HEADERS — bolded header text\n#      starting with \"PRED N — ...\", \"SIGNAL N — ...\", \"Scenario A — ...\".\n#      Any magnitude inside such a header (e.g., \"PRED 2 — Stablecoin\n#      Combined UST Holdings Cross $250B\") is the threshold defining the\n#      prediction itself, not an external claim.\n# Brief 8 audit: 89 raw D01 hits decomposed as ~29 structural-FP + ~60\n# genuine D01 violations. Without this exclusion, W-scoring reports inflate\n# the compliance gap by ~33%. Cross-ref BACKLOG WCV-DRIFT-CLAIM-\n# TRACEABILITY-01 + the same-day false-friend revert lesson (commit\n# 615ee81) — both surfaced from raw-runner-count vs sampled-population\n# divergence.\n#\n# Corpus-wide audit 2026-05-27 late-evening surfaced 5 additional FP classes\n# accounting for ~71 of 77 hits across Briefs 1-3/6-8: scenario-with-parens\n# headers (\"Scenario A: Managed De-escalation (35%)\"), self-assessment\n# probability statements (\"We assess 45% probability that...\"), most-likely-\n# outcome inline summaries, \"Probability:/Probabilidad:\" PRED-block-internal\n# percentages, and bolded scenario-probability cells/predictions handled via\n# helpers in `check_anchor_number_footnote_url` (word-count + enclosing-tag\n# + adjacency-keyword heuristics rather than pure prefix regex).\n_D01_STRUCTURAL_EXCLUSION_RE = re.compile(\n    r'^\\s*(?:'\n    # Confidence band: pure range%, optional spaces around dash, %, or .\n    r'\\d+(?:[.,]\\d+)?\\s*[–\\-]\\s*\\d+(?:[.,]\\d+)?\\s*%\\s*\\+?'\n    r'|'\n    # PRED/SIGNAL/SEÑAL/Alert/Alerta/Scenario/Escenario header prefix.\n    # Identifier portion accepts combined number+letter forms (\"5A\", \"5B\")\n    # as used by Brief 8's Scenario 5A/5B/5C tri-fork pattern, in addition\n    # to bare digit (\"PRED 2\", \"SIGNAL 1\") and bare letter (\"Scenario A\")\n    # forms (post-2026-05-27 evening Brief 8 cosmetic-patch surfaced gap).\n    r'(?:PRED|SIGNAL|SEÑAL|Alert|Alerta|Scenario|Escenario)\\s+(?:\\d+[A-Z]?|[A-Z])\\s*[—–\\-]'\n    r'|'\n    # Scenario-with-colon-and-parenthesized-percentage header (Briefs 1/2/3\n    # pattern): \"Scenario A: Managed De-escalation (35%)\" — distinct from\n    # the dash-separated PRED/SIGNAL form above; carries the scenario\n    # probability inside parens rather than as a tabular cell.\n    r'(?:Scenario|Escenario)\\s+[A-Z][A-Z\\d]?:\\s+[^()\\n]+\\(\\d+(?:[.,]\\d+)?\\s*%\\)'\n    r'|'\n    # Self-assessment probability statements: \"We assess 45% probability...\",\n    # \"Evaluamos una probabilidad del 45 %...\", \"Asignamos un 30 % de\n    # probabilidad...\", \"Estimamos una probabilidad del 60 %...\" — these are\n    # the brief\\'s own forecast probability declarations, not external claims.\n    r'(?:We\\s+assess|Estimamos|Evaluamos|Asignamos)\\s+'\n    r'|'\n    # Bare \"NN% probability\" / \"NN% probabilidad\" (Brief 8 [4][8] pattern:\n    # short-form forecast magnitude inside narrative — distinct from \"We\n    # assess\" prefix since the bold scope starts at the magnitude rather\n    # than the verb).\n    r'\\d+(?:[.,]\\d+)?\\s*%\\s+(?:probability|probabilidad)\\b'\n    r'|'\n    # Reverse-order Spanish forecast magnitude: \"probabilidad del 60 %\"\n    # / \"probability of 60%\" — bold body starts with the noun and the\n    # magnitude follows. Brief 8 ES tab pattern where the \"Estimamos\"\n    # self-assessment prefix is OUTSIDE the bold scope; pre-context\n    # check below also catches this via Estimamos/We-assess proximity.\n    r'(?:probability|probabilidad)\\s+(?:del|of)\\s+\\d+'\n    r'|'\n    # Methodology meta: \"Probability band tightened from initial X% draft\n    # to Y%\" / \"Banda de probabilidad ajustada desde el borrador inicial\n    # de X% al Y%\" — audit-trail commentary about probability calibration\n    # iteration (Brief 8 PRED 9 pattern). Word count is borderline (11)\n    # so the >12 long-prose threshold doesn\\'t catch it; explicit phrase.\n    r'(?:Probability\\s+band|Banda\\s+de\\s+probabilidad)\\b'\n    r'|'\n    # Most-likely-outcome summaries: \"most likely outcome (45%)\", \"resultado\n    # más probable (45 %)\", \"desenlace más probable (50 %)\" — methodology\n    # meta-statement about which scenario carries the highest probability.\n    r'(?:the\\s+)?(?:most\\s+likely\\s+outcome|resultado\\s+más\\s+probable|desenlace\\s+más\\s+probable)\\s*\\('\n    r'|'\n    # \"Probability:\" / \"Probabilidad:\" PRED-block-internal probability\n    # declarations (Brief 8 PRED 8 pattern: bold scope starts with the\n    # label, e.g. \"Probability: 55–65%\" inside formal prediction blocks).\n    r'(?:Probability|Probabilidad):\\s*\\d'\n    r')',\n    re.IGNORECASE,\n)\n\n\n# Bare-percentage in scenario-row context. Bold body is just a percentage\n# (e.g. \"<strong>45%</strong>\"); excluded only when inside a `<td>` cell or\n# adjacent to a \"Falsifier:/Falsador:\" prediction-pattern keyword in the\n# next ~120 chars. Tabular probability cells (Brief 6 scenario tables, 18\n# hits) and bare-percentage prediction probabilities (Brief 7 standalone\n# \"60% by 2026-09-30. Falsifier:\" pattern, 8 hits) are both the brief\\'s\n# own forecast output, not external claims requiring URL anchoring.\n_D01_BARE_PCT_RE = re.compile(r'^\\s*\\d{1,3}(?:[.,]\\d+)?\\s*%\\s*$')\n_D01_FALSIFIER_ADJACENT_RE = re.compile(\n    r'\\b(?:Falsifier|Falsador)\\s*:', re.IGNORECASE\n)\n\n\n# Bolded long-form prose. When a bold body runs more than 12 words, it is\n# almost always emphasis/lead-in (e.g. \"The most consequential payments-\n# infrastructure development of 2024 was not a new...\") or quoted-deferred\n# prediction text rather than an anchor-number. D01 was designed for short\n# bolded magnitudes; long prose bolding is a different editorial intent\n# that the rule shouldn\\'t over-apply to. Brief 8 audit class.\n_D01_LONG_PROSE_WORD_THRESHOLD = 12\n\n\n# Hypothesis-threshold scenario projections (Brief 8 [1][2][3][6] pattern:\n# \"if Gulf hydrocarbon settlement reaches 5%+ yuan-denominated via mBridge\").\n# The bold body is short (\"5%+ yuan-denominated\") but the prose IMMEDIATELY\n# BEFORE the bold contains a conditional trigger naming a settlement/\n# liquidation/transition reaching the threshold. These are scenario\n# projections, not external facts.\n_D01_HYPOTHESIS_TRIGGER_RE = re.compile(\n    r'(?:if|si)\\s+\\S+(?:\\s+\\S+){0,8}?\\s+'\n    r'(?:settlement|liquidaci[óo]n|reaches|alcanza|share|cuota)',\n    re.IGNORECASE,\n)\n\n\n# Pre-context self-assessment proximity. When prose IMMEDIATELY BEFORE the\n# bold contains \"Estimamos / Evaluamos / Asignamos / We assess\" within ~80\n# chars, the bold (even if it\\'s just \"probabilidad del 60 %\") is the\n# magnitude portion of a self-assessment statement where the prefix sits\n# OUTSIDE the bold scope. Brief 8 ES tab pattern: \"Estimamos una\n# probabilidad del 60 % — que reconcilia...\"\n_D01_SELF_ASSESS_PRE_RE = re.compile(\n    r'\\b(?:Estimamos|Evaluamos|Asignamos|We\\s+assess)\\b[^<]{0,80}$',\n    re.IGNORECASE,\n)\n\n\n# 2026-05-31 density FP-audit (Brief 9 Stablecoins): three additional FP\n# classes where the bolded magnitude is the brief's OWN forecast output, not an\n# external claim requiring a URL footnote. Detected via a context window around\n# the bold (phrase can sit just before, or — for stated-confidence — just\n# after, the magnitude). Raw 17 hits → ~0-2 real (≈38x inflation; classic\n# raw-count-vs-sampled-population). Cross-ref the Brief 8 W22 89-hit audit.\n#   (1) PRED stated-confidence: \"65% stated confidence\" / \"confianza declarada\"\n#       — the PRED entry's own probability, not an external fact.\n#   (2) Falsifier / counter-frame thresholds: \"falsified if EUR supply > 5%\".\n#   (3) Cover-digest & threshold-to-monitor scenario triggers: the brief's own\n#       forward thresholds (\"threshold to monitor … $250 billion\"). Also skip\n#       magnitudes that sit inside an SD-* cover-digest HTML comment.\n_D01_FORECAST_CONTEXT_RE = re.compile(\n    r'\\b(?:'\n    r'stated\\s+confidence|confianza\\s+declarada'\n    r'|falsified\\s+if|queda\\s+falsada\\s+si|counter[\\s-]?frame|contramarco'\n    r'|threshold\\s+to\\s+monitor|umbral\\s+a\\s+vigilar'\n    r'|scenario\\s+impact|impacto\\s+del?\\s+escenario'\n    # retired-prediction rationale block: the bold restates a retired\n    # prediction whose category enum / rationale follows. Not an external claim.\n    r'|insufficient_observable_trigger|observable_trigger'\n    r')\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_anchor_number_footnote_url(\n    html: str, sample_mode: int | None = None,\n) -> str | None:\n    \"\"\"D01 — anchor-numbers (magnitude in <strong>/<b>) must point to a\n    footnote whose body has a URL or a known source-org token.\n\n    Algorithm:\n      1. If brief has 0 <li id=\"fn-N\"> entries → no-op (legacy briefs).\n      2. Build map fn_id → (URL_or_source_token_present: bool).\n      3. For each <strong>/<b> containing a magnitude, look in the next\n         100 chars for <sup><a href=\"#fn-N\">. If absent OR fn-N body has\n         no URL and no source-org token → strike.\n      4. Fire WARNING if ≥3 strikes (calibration window — too few suggests\n         editorial choice; ≥3 suggests systemic).\n\n    Started as WARNING. Promote to BLOCKER after 1 week clean. Cross-ref\n    EDITORIAL-LEGAL-02 D01.\n\n    `sample_mode` (int|None): when not None, emit up to that many categorized\n    samples appended as an indented block under the warning. See\n    `_classify_sample_bucket` for the content-pattern taxonomy. Origin:\n    [[feedback_w_report_raw_count_vs_sampled_population]] 2026-05-27 PM.\n    \"\"\"\n    fn_bodies: dict[str, str] = {\n        m.group(1): m.group(2)\n        for m in _D01_FN_BODY_RE.finditer(html)\n    }\n    if not fn_bodies:\n        return None  # legacy brief without footnote scaffolding\n\n    # Map fn_id → boolean \"footnote body cites URL or source org\"\n    fn_has_source: dict[str, bool] = {}\n    for fn_id, body in fn_bodies.items():\n        fn_has_source[fn_id] = bool(_D01_URL_OR_SOURCE_RE.search(body))\n\n    # Find magnitude-bearing <strong>/<b> tags.\n    strong_re = re.compile(\n        r'<(strong|b)\\b[^>]*>(.*?)</\\1>',\n        re.IGNORECASE | re.DOTALL,\n    )\n\n    strikes: list[str] = []\n    sample_records: list[dict] = []\n    for sm in strong_re.finditer(html):\n        body = sm.group(2)\n        # Must contain a magnitude pattern.\n        if not _D01_MAGNITUDE_RE.search(body):\n            continue\n        # Structural-exclusion: skip confidence bands (\"60-75%\") and\n        # PRED/SIGNAL/Scenario header prefixes — these are internal markers,\n        # not external claims (see _D01_STRUCTURAL_EXCLUSION_RE docstring).\n        body_collapsed = re.sub(r'\\s+', ' ', body).strip()\n        if _D01_STRUCTURAL_EXCLUSION_RE.match(body_collapsed):\n            continue\n        # Bolded long-form prose: bold body running >12 words is emphasis/\n        # lead-in or quoted-deferred prediction text rather than an anchor-\n        # number. D01 was designed for short bolded magnitudes; long bolded\n        # prose is a different editorial intent (Brief 8 audit class).\n        if len(re.findall(r'\\b\\w+\\b', body_collapsed)) > _D01_LONG_PROSE_WORD_THRESHOLD:\n            continue\n        # Bare-percentage in scenario-row context: bold body is just `NN%`\n        # AND either enclosing element is <td> (scenario-table cell, Brief 6)\n        # OR the next ~120 chars contain Falsifier:/Falsador: prediction\n        # pattern (Brief 7 standalone-prediction probabilities). Both are\n        # the brief\\'s own forecast output, not external claims.\n        if _D01_BARE_PCT_RE.match(body_collapsed):\n            container = _enclosing_element(html, sm.start())\n            if container == 'td':\n                continue\n            tail_window = html[sm.end():sm.end() + 120]\n            if _D01_FALSIFIER_ADJACENT_RE.search(tail_window):\n                continue\n        # Hypothesis-threshold scenario projection: prose IMMEDIATELY BEFORE\n        # the bold contains a conditional trigger (\"if Gulf settlement\n        # reaches\", \"Si X alcanza\"). The bold body is a short threshold\n        # magnitude expressing a hypothetical, not an external fact (Brief 8\n        # mBridge 5%+ yuan-denominated pattern, 4 hits).\n        pre_window = html[max(0, sm.start() - 200):sm.start()]\n        if _D01_HYPOTHESIS_TRIGGER_RE.search(pre_window):\n            continue\n        # Self-assessment proximity: prose within ~80 chars before the bold\n        # contains \"Estimamos / Evaluamos / Asignamos / We assess\" — the\n        # bold is the magnitude portion of a self-assessment statement\n        # whose verb prefix is outside the bold scope (Brief 8 ES tab\n        # \"Estimamos una probabilidad del 60 %\" pattern).\n        pre_short = html[max(0, sm.start() - 80):sm.start()]\n        if _D01_SELF_ASSESS_PRE_RE.search(pre_short):\n            continue\n        # 2026-05-31 density FP-audit: brief's-own-forecast magnitude classes\n        # (PRED stated-confidence, falsifier/counter-frame thresholds, cover-\n        # digest / threshold-to-monitor scenario triggers). The qualifying\n        # phrase can sit just before, or — for stated-confidence — just after,\n        # the bolded magnitude, so scan a window spanning both sides.\n        ctx = html[max(0, sm.start() - 160):min(len(html), sm.end() + 90)]\n        if _D01_FORECAST_CONTEXT_RE.search(ctx):\n            continue\n        # Magnitudes inside an SD-* cover-digest HTML comment are machine-\n        # readable metadata, not body claims. We are inside a comment iff the\n        # nearest preceding '<!--' is closer than the nearest preceding '-->'.\n        if html.rfind('<!--', 0, sm.start()) > html.rfind('-->', 0, sm.start()):\n            continue\n        # Look for <sup><a href=\"#fn-N\"> in next ~200 chars after </strong>/</b>.\n        # Window widened from 100 → 200 (2026-05-27 late-evening) after audit\n        # surfaced systematic over-firing on briefs whose editorial convention\n        # places fn-refs 80-150 chars after bolds in the pattern:\n        #   `<strong>X</strong>, descriptive prose [WEB: Source]<sup><a href=\"#fn-N\">`\n        # The 100-char limit was cutting mid-attribute on legitimately-anchored\n        # magnitudes (Brief 8 mBridge ~$55.5B cumulative: fn-2 starts at +80\n        # chars but `<a href=\"#fn-2\">` closing `>` lands at ~+101). Trade-off:\n        # extended window slightly increases risk that an unrelated next-\n        # magnitude\\'s fn-ref accidentally satisfies the check. Mitigation: the\n        # editorial pattern is consistent enough that adjacent-magnitude fn-\n        # refs typically share the same source paragraph, making the looser\n        # check semantically still correct.\n        end = sm.end()\n        window = html[end:end + 200]\n        ref = _D01_FN_REF_RE.search(window)\n        magnitude_snip = body_collapsed[:50]\n        strike_type: str | None = None\n        if not ref:\n            strikes.append(f'no fn-ref: <strong>{magnitude_snip}</strong>')\n            strike_type = 'no fn-ref'\n        else:\n            fn_id = ref.group(1)\n            if not fn_has_source.get(fn_id, False):\n                strikes.append(\n                    f'fn-{fn_id} lacks URL/source-org token: '\n                    f'<strong>{magnitude_snip}</strong>'\n                )\n                strike_type = f'fn-{fn_id} lacks URL/source-org'\n\n        if sample_mode is not None and strike_type is not None:\n            sample_records.append({\n                'container': _enclosing_element(html, sm.start()),\n                'content': body_collapsed,\n                'bucket': _classify_sample_bucket(body_collapsed),\n                'strike_type': strike_type,\n            })\n\n    # Conservative threshold during baseline week.\n    if len(strikes) < 3:\n        return None\n    msg = (\n        f'{len(strikes)} anchor-number(s) without URL-bearing footnote (D01); '\n        f'samples: {\" | \".join(strikes[:3])} — every magnitude in <strong>/<b> '\n        f'should resolve to a footnote with a URL or named source organisation'\n    )\n    if sample_mode is not None and sample_records:\n        block = _format_samples_block(sample_records, sample_mode)\n        if block:\n            msg = msg + '\\n' + block\n    return msg\n\n\ndef check_bilingual_sources_present(html: str) -> str | None:\n    \"\"\"Briefs should ship with ES sources alongside EN; Pattern C (EN-only) breaks ES toggle UX.\n\n    Catches: brief has English footnotes + body fn-N references + ZERO Spanish-suffix footnote IDs.\n    - Pattern A (Briefs 1+2): dual <ol class=\"lang-XX\"> siblings in one <section> — has fn-N-es ✓\n    - Pattern B (Brief 6+):   two separate <section> blocks per lang div — has fn-N-es ✓\n    - Pattern C (Briefs 3-5 pre-erratum 2026-05-08): single EN-only <ol> — NO fn-N-es → fires.\n\n    Conditional skip: 0 <li id=\"fn-N\"> entries (legacy briefs without footnote markup),\n    OR 0 body href=\"#fn-N\" references (no toggle-UX exposure if sources are decorative-only).\n\n    WARNING (calibrate-then-ratchet); promote to BLOCKER after the published archive runs\n    clean for a stable window. Currently passes for all 7 briefs after the Briefs 3-5\n    erratum back-fix landed (commit 7d87415 + 683b4bd). Origin: post-erratum gap closure\n    2026-05-08; cross-ref project_sd_briefs_sources_block_patterns.md.\n    \"\"\"\n    en_fn = re.findall(r'<li\\s+id=[\"\\']fn-\\d+[\"\\']', html, re.IGNORECASE)\n    if not en_fn:\n        return None  # legacy brief without footnote scaffolding\n\n    body_refs = re.findall(r'href=[\"\\']#fn-\\d+[\"\\']', html, re.IGNORECASE)\n    if not body_refs:\n        return None  # no body refs, sources don't matter for toggle UX\n\n    es_fn = re.findall(r'<li\\s+id=[\"\\']fn-\\d+-es[\"\\']', html, re.IGNORECASE)\n    if es_fn:\n        return None  # Pattern A or B — bilingual coverage present\n\n    return (\n        f'bilingual sources missing: {len(en_fn)} English footnote(s) and '\n        f'{len(body_refs)} body reference(s) but 0 Spanish-suffix footnote(s) '\n        f'(`id=\"fn-N-es\"`); Spanish-toggle readers cannot access citation '\n        f'infrastructure. Add a translated sources block — Pattern A '\n        f'(`<ol class=\"lang-en\">` + `<ol class=\"lang-es\">` inside one '\n        f'`<section class=\"sources\">`) or Pattern B (one `<section>` inside '\n        f'each `<div class=\"lang-XX\">`); see project_sd_briefs_sources_block_patterns.'\n    )\n\n\n\ndef _filter_entities_by_brand(catalogue: list, brand: str | None) -> list:\n    \"\"\"Filter listed-entities.yaml-style catalogue by brand field.\n    Same semantics as _filter_registry_by_brand: None → all; specific brand →\n    entries matching brand or 'both'; missing brand defaults 'shadow-dynamics'.\n    Per FORK-PREP-GAP-1-DATA-BRAND-AWARE-01.\n    \"\"\"\n    if brand is None:\n        return catalogue\n    filtered = []\n    for entry in catalogue:\n        if not isinstance(entry, dict):\n            continue\n        entry_brand = entry.get('brand', 'shadow-dynamics')\n        if entry_brand == brand or entry_brand == 'both':\n            filtered.append(entry)\n    return filtered\n\n\ndef check_listed_entity_density(html: str, brand: str | None = None) -> str | None:\n    \"\"\"Listed-entity density × financial-metric proximity (sector/instrument boundary).\n\n    Algorithm:\n      1. Strip tags to plain text.\n      2. For each entity in `data/listed-entities.yaml`, count word-boundary\n         occurrences of `name` + every alias (case-insensitive).\n      3. If total ≥ 3, scan ±150 chars around any single mention for a\n         financial-metric token (revenue, EBITDA, target price, P/E, EPS, etc.).\n      4. Fire one warning per entity that meets both conditions.\n\n    WARNING (not BLOCKER) — legitimate sectoral discussion can name companies.\n    The warning surfaces the boundary case for editorial review.\n\n    Skip entities with empty `aliases` and empty `name` (defensive).\n    Skip entries whose `name` has < 4 chars to avoid acronym false-positives\n    (e.g. \"EDP\" matching \"EDP\" inside other contexts) unless the entry\n    explicitly opts into short-token matching via a future flag (not today).\n    \"\"\"\n    catalogue = _filter_entities_by_brand(_load_listed_entities(), brand)\n    if not catalogue:\n        return None\n\n    # Plain text — strip tags so we don't count hits inside <head>/<style> etc.\n    text = re.sub(r'<(?:script|style)[^>]*>.*?</(?:script|style)>', ' ',\n                  html, flags=re.IGNORECASE | re.DOTALL)\n    text = re.sub(r'<[^>]+>', ' ', text)\n\n    flagged: list[str] = []\n    for entry in catalogue:\n        if not isinstance(entry, dict):\n            continue\n        name = (entry.get('name') or '').strip()\n        if not name:\n            continue\n        aliases = entry.get('aliases') or []\n        forms = [name] + [a for a in aliases if a]\n        # Build a single regex matching any form. Sort longest-first so\n        # \"BAE Systems\" wins before bare \"BAE\".\n        forms_sorted = sorted(set(forms), key=len, reverse=True)\n        # Skip extremely short forms (<4 chars) unless they're the only form\n        # — too prone to false matches on acronym collisions.\n        forms_sorted = [\n            re.escape(f) for f in forms_sorted\n            if len(f) >= 4 or len(forms_sorted) == 1\n        ]\n        if not forms_sorted:\n            continue\n        try:\n            entity_re = re.compile(r'\\b(?:' + '|'.join(forms_sorted) + r')\\b',\n                                    re.IGNORECASE)\n        except re.error:\n            continue\n\n        matches = list(entity_re.finditer(text))\n        if len(matches) < 3:\n            continue\n\n        # Check for instrument-metric proximity: require ≥2 distinct hits to\n        # avoid one-coincidence noise on briefs that legitimately discuss a\n        # company's strategic role.\n        metric_hits: list[str] = []\n        seen_positions: set[int] = set()\n        for m in matches:\n            s = max(0, m.start() - 150)\n            e = min(len(text), m.end() + 150)\n            for mm in _L03_METRIC_RE.finditer(text[s:e]):\n                pos = s + mm.start()\n                if pos in seen_positions:\n                    continue\n                seen_positions.add(pos)\n                metric_hits.append(mm.group(0)[:50])\n                if len(metric_hits) >= 3:\n                    break\n            if len(metric_hits) >= 3:\n                break\n        if len(metric_hits) < 2:\n            continue\n\n        flagged.append(\n            f'{name} ({len(matches)}×, instrument metrics: '\n            + ', '.join(f'\"{h}\"' for h in metric_hits[:2]) + ')'\n        )\n\n    if not flagged:\n        return None\n    return (f'{len(flagged)} listed entit{\"y\" if len(flagged)==1 else \"ies\"} '\n            f'with financial-metric proximity (L03 sector-vs-instrument): '\n            + '; '.join(flagged[:5]) + ' — consider sectoral rephrase')\n\n\n# ── P01: placeholder/template-failure detection ───────────────────────────────\n# Specific patterns observed in Brief 5 + likely model-template misfires.\n# Generic \"double space anywhere\" too noisy; we target known shapes.\n\n_P01_PATTERNS = (\n    # English patterns\n    r'\\bDrawing\\s+on\\s*,',\n    r'\\bcarry\\s+where\\b',\n    r'\\bcarry\\s\\s+\\w+',\n    r'\\braising\\s\\s+\\w+',\n    r'\\bAll\\s+carry\\s+where\\b',\n    # Spanish patterns\n    r'\\bA\\s+partir\\s+de\\s*,',\n    r'\\bLas\\s+fuentes\\s\\s+\\w+',\n    r'\\bTodas\\s+llevan\\s\\s+\\w+',\n    r'\\bvisible\\s+en\\s+las\\s+fuentes\\s+—\\s+\\b',\n    r'\\bbasada\\s+en\\s*,',\n    r'\\bsegún\\s*,',\n    # Generic \"verb + comma + nothing meaningful before next noun\"\n    r'\\b(?:on|in|de|en)\\s*,\\s*(?:corpus|sources|el|la|los|las)\\b',\n)\n_P01_RE = re.compile('|'.join(_P01_PATTERNS), re.IGNORECASE)\n\n\ndef check_placeholder_template_failure(html: str) -> str | None:\n    \"\"\"Detect placeholder gaps from model-template misfires.\n\n    Origin: Brief 5 European Defence shipped 7 placeholders (commit 1818bcb\n    retroactive A3 fix): \"raising  questions\", \"Drawing on , corpus,\",\n    \"All carry  where not directly sourced\", \"Las fuentes  documentan\",\n    \"A partir de , el corpus\", \"visible en las fuentes —\", \"Todas llevan\n    donde no cuentan con fuente directa\". Pattern class: model emitted\n    template tokens (verb + comma + missing-noun-gap + connective) that\n    the editorial pass should have filled.\n\n    Started as WARNING. Promote to BLOCKER once 1 week of clean runs\n    confirms the pattern set is calibrated. Cross-ref EDITORIAL-LEGAL-02 P01.\n    \"\"\"\n    hits = list(_P01_RE.finditer(html))\n    if not hits:\n        return None\n    samples = []\n    for m in hits[:3]:\n        ctx = html[max(0, m.start()-25):min(len(html), m.end()+30)]\n        samples.append(ctx.replace('\\n', ' '))\n    return (f'{len(hits)} placeholder/template-gap hit(s) — '\n            f'P01 model-misfire flag; samples: {\" | \".join(samples)}')\n\n\n# ── P03 (partial): cross-lingual false-friend \"billón/trillón\" ───────────────\n# Spanish \"billón\" = 10^12 (a trillion in US sense); English \"billion\" = 10^9.\n# When a translator carries \"billion\" → \"billón\" the figure is 1000× off.\n# Current corpus convention keeps shorthand `$X B` in both languages, so any\n# occurrence of `billón` / `trillón` in the lang-es scope is a translator\n# slip until proven otherwise. WARNING (not BLOCKER) — a WARN→BLOCKER\n# promotion attempted 2026-05-27 PM was reverted same-session: Brief 8 W22's\n# 59-hit cluster turned out to be entirely legitimate 10^12 translations\n# (the EN side uses \"$XT\" shorthand for trillion which the original context-\n# negate regex didn't recognize). Context-negate extended same commit to\n# also recognize T-shorthand; severity stays at WARN to keep human-review\n# surface on any genuine future slip without gating publish on a check that\n# just demonstrated false-positive risk. Cross-ref EDITORIAL-LEGAL-02 P03\n# (this is the false-friend half — the full numerical-equivalence sweep\n# is deferred).\n\n_P03_FALSE_FRIEND_RE = re.compile(r'\\b(?:bill[óo]n(?:es)?|trill[óo]n(?:es)?)\\b', re.IGNORECASE)\n\n\ndef check_cross_lingual_false_friend(\n    html: str, sample_mode: int | None = None,\n) -> str | None:\n    \"\"\"Flag billón/trillón usage in ES scope — likely false-friend translation\n    error (EN billion=10^9 vs ES billón=10^12, 1000× discrepancy).\n\n    context-negate (added 2026-05-15 PM per Brief 7 audit class E5 false-positive;\n    extended 2026-05-27 PM per Brief 8 W22 false-positive cluster — see below):\n    when \"billón/billones\" appears AND the corresponding EN scope has the\n    cognate \"trillion\" within the same content window (signaling the magnitude\n    is actually 10^12), the ES \"billón\" is the CORRECT translation, not a\n    false-friend slip.\n\n    Recognized EN trillion forms (extended 2026-05-27 PM):\n    - \"trillion\" / \"Trillion\" (word form)\n    - \"$XT\", \"€XT\", \"£XT\", \"¥XT\" (currency-prefixed T-shorthand, e.g. \"$6.5T\",\n      \"¥175.49T\") — Brief 8 Payments uses this shorthand throughout EN scope.\n      Original regex only matched the word form, producing 59 false-positives\n      on Brief 8 where the EN side reads \"$6.5T T-bills\" and ES side reads\n      \"6,5 billones $ en letras del Tesoro\" — both correctly mean 10^12.\n\n    Detection: scan ±250 chars of the ES hit's containing <p>; if the matching\n    EN-scope paragraph contains \"trillion\" word OR \"$X T\" shorthand near the\n    same numeric value, skip.\n\n    `sample_mode` (int|None): when not None, emit up to that many categorized\n    samples appended as an indented block under the warning. Origin:\n    [[feedback_w_report_raw_count_vs_sampled_population]] 2026-05-27 PM.\n    \"\"\"\n    hits = []\n    sample_records: list[dict] = []\n    for m in _P03_FALSE_FRIEND_RE.finditer(html):\n        pos = m.start()\n        ctx = html[:pos]\n        en_idx = ctx.rfind('class=\"lang-en\"')\n        es_idx = ctx.rfind('class=\"lang-es\"')\n        if max(en_idx, es_idx) < 0:\n            continue\n        if es_idx > en_idx:\n            # context-negate: extract numeric value adjacent to \"billón/billones\" hit\n            window = html[max(0, pos-80):min(len(html), m.end()+30)]\n            num_match = re.search(r'(\\d+(?:[\\.,]\\d+)?)\\s*(?:bill|trill)', window, re.IGNORECASE)\n            # 2026-06-01 FP-precision: the context-negate originally only handled\n            # DIGIT forms (\"3 billones\"); a SPELLED-OUT ES number (\"tres billones\n            # de dólares\" = three trillion, e.g. China's ~$3.2T USD reserves)\n            # bypassed it → false-positive. Map the spelled ES quantifier to its\n            # digit + English-word forms so the EN-scope trillion negate can match\n            # (\"three trillion\" / \"$3T\").\n            if not num_match:\n                _ES_SPELLED = {\n                    'un': ('1', 'one'), 'uno': ('1', 'one'), 'dos': ('2', 'two'),\n                    'tres': ('3', 'three'), 'cuatro': ('4', 'four'),\n                    'cinco': ('5', 'five'), 'seis': ('6', 'six'),\n                    'siete': ('7', 'seven'), 'ocho': ('8', 'eight'),\n                    'nueve': ('9', 'nine'), 'diez': ('10', 'ten'),\n                }\n                sp = re.search(r'\\b(un|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez)\\s+(?:bill|trill)',\n                               window, re.IGNORECASE)\n                if sp:\n                    digit, eng = _ES_SPELLED[sp.group(1).lower()]\n                    if re.search(rf'(?:€|\\$|£|¥)?\\s*{digit}(?:[.,]\\d+)?\\s*(?:\\+?\\s*)?trillion'\n                                 rf'|\\b{eng}\\s+trillion'\n                                 rf'|(?:€|\\$|£|¥).{{0,20}}?{digit}\\s*(?:\\+?\\s*)?T\\b',\n                                 html, re.IGNORECASE):\n                        continue  # spelled ES billón = EN trillion = 10^12 (correct)\n            if num_match:\n                # Look for the same numeric value adjacent to \"trillion\" (word) OR\n                # \"T\" shorthand (currency-prefixed) in the EN scope (anywhere in\n                # document — same fact appears in both). Extended 2026-05-27 PM\n                # to recognize T-shorthand convention.\n                num_str = num_match.group(1).replace(',', '.').replace('.', r'[\\.,]?')\n                en_pattern = re.compile(\n                    # Word form: optional currency + num + optional + + \"trillion\"\n                    rf'(?:€|\\$|£|¥)?\\s*{num_str}\\s*(?:\\+?\\s*)?trillion'\n                    rf'|'\n                    # T-shorthand: currency prefix required (within 20 chars before\n                    # num) + num + optional + + literal \"T\" + word boundary. The\n                    # currency-prefix requirement avoids false-negatives on stray\n                    # \"2T\" / \"10T\" patterns unrelated to trillion magnitudes.\n                    rf'(?:€|\\$|£|¥).{{0,20}}?{num_str}\\s*(?:\\+?\\s*)?T\\b',\n                    re.IGNORECASE,\n                )\n                if en_pattern.search(html):\n                    continue  # context-negate matched: ES billón = EN trillion = 10^12 (correct)\n            ctx_snip = html[max(0, pos-30):min(len(html), m.end()+30)]\n            ctx_snip = re.sub(r'\\s+', ' ', ctx_snip).strip()\n            hits.append(f'{m.group(0)!r} ({ctx_snip!r})')\n            if sample_mode is not None:\n                sample_records.append({\n                    'container': _enclosing_element(html, pos),\n                    'content': ctx_snip,\n                    'bucket': _classify_sample_bucket(ctx_snip),\n                    'strike_type': f'ES false-friend: {m.group(0)!r}',\n                })\n    if not hits:\n        return None\n    msg = (f'{len(hits)} false-friend hit(s) in ES scope — '\n           f'EN \"billion\"=10^9 vs ES \"billón\"=10^12 (use \"mil millones\" '\n           f'for 10^9): ' + '; '.join(hits[:3]))\n    if sample_mode is not None and sample_records:\n        block = _format_samples_block(sample_records, sample_mode)\n        if block:\n            msg = msg + '\\n' + block\n    return msg\n\n\n# ── S01: inference-citation ratio (SD-estimate footnote share) ───────────────\n# Cross-ref EDITORIAL-LEGAL-02 S01 + EDITORIAL-PROMPT INFERENCE SOURCE\n# TRANSPARENCY rule. Editorial standard: at most 25% of footnote-anchored\n# claims may rely solely on internal SD analytical inference (vs corpus\n# citation or external source). Detection:\n#   - Find <li id=\"fn-N\">…</li> entries whose body matches inference tokens\n#     (\"SD estimate\" / \"no direct corpus citation\" / \"internal estimate\").\n#   - Count <sup><a href=\"#fn-N\"> refs to those vs all sup refs.\n#   - Fire if total sup ≥ 4 AND inference ratio > 0.25.\n# Threshold ≥ 4 avoids noise on briefs with very few footnotes (Brief 5 had\n# 15+ sups; below 4 the ratio is statistically meaningless).\n\n_S01_INFERENCE_TOKENS = re.compile(\n    r'\\b(?:SD\\s+estimate|no\\s+direct\\s+corpus\\s+citation|internal\\s+estimate|'\n    r'SD\\s+analytical|estimaci[óo]n\\s+(?:interna|SD)|sin\\s+cita\\s+directa\\s+del\\s+corpus)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_inference_citation_ratio(html: str) -> str | None:\n    \"\"\"Flag briefs where >25% of footnote-anchored claims rely on SD estimate\n    rather than corpus or external citation.\"\"\"\n    # Build set of inference-footnote IDs\n    inference_ids = set()\n    for m in re.finditer(r'<li\\s+id=\"fn-(\\d+)\"[^>]*>(.*?)</li>', html, re.S):\n        fn_id, body = m.group(1), m.group(2)\n        if _S01_INFERENCE_TOKENS.search(body):\n            inference_ids.add(fn_id)\n    # Count sup references\n    total_refs = 0\n    inf_refs = 0\n    for m in re.finditer(r'<sup[^>]*>.*?href=\"#fn-(\\d+)\".*?</sup>', html, re.S):\n        total_refs += 1\n        if m.group(1) in inference_ids:\n            inf_refs += 1\n    if total_refs < 4:\n        return None  # too few refs to be statistically meaningful\n    if not inference_ids:\n        return None\n    ratio = inf_refs / total_refs\n    if ratio <= 0.25:\n        return None\n    return (f'inference-citation ratio {inf_refs}/{total_refs} = {ratio:.0%} '\n            f'exceeds 25% (S01 threshold); inference fn IDs: '\n            f'{sorted(inference_ids, key=int)} — strengthen with corpus or '\n            f'external citation, or label specific numbers as \"SD estimate\"')\n\n\n# ── P02: bilingual structural parity (h3 + table + p) ────────────────────────\n# Extends `section_count_parity` (which covers h2 only) to detect ES\n# truncation that survives at the section-header level but loses subsections\n# or content paragraphs. Origin: EDITORIAL-LEGAL-02 P02 — Brief 5 European\n# Defense ES half had Sección VIII truncated (h2 OK, h3 + p missing).\n# Tolerances calibrated against the 5-brief corpus on 2026-05-02:\n#   h3:    Δ ≤ 2  (corpus max Δ=1, allow some drift for headings)\n#   table: Δ ≤ 1  (corpus max Δ=0)\n#   p:     Δ ≤ max(8, 20% of larger)  (corpus max Δ=4 on 63 p tags)\n\ndef _count_in_lang_scope(html: str, tag: str) -> tuple[int, int]:\n    \"\"\"Count <tag> occurrences attributed to lang-en vs lang-es by nearest\n    preceding `class=\"lang-XX\"` opener. Tags outside any lang scope are\n    ignored. Used by P02; keep consistent with the heuristic in\n    EDITORIAL-LEGAL-05 audit.\"\"\"\n    en = es = 0\n    for m in re.finditer(rf'<{tag}[\\s>]', html):\n        ctx = html[:m.start()]\n        en_idx = ctx.rfind('class=\"lang-en\"')\n        es_idx = ctx.rfind('class=\"lang-es\"')\n        if max(en_idx, es_idx) < 0:\n            continue\n        if en_idx > es_idx:\n            en += 1\n        else:\n            es += 1\n    return en, es\n\n\ndef check_bilingual_extended_parity(html: str) -> str | None:\n    \"\"\"Bilingual structural parity beyond h2 — guards ES truncation that\n    section_count_parity (h2-only) misses.\n\n    Cross-ref EDITORIAL-LEGAL-02 P02. Tolerances calibrated 2026-05-02.\n    Conditional guard: brief without any lang-en or lang-es scope (legacy\n    monolingual or test fixture) → silent no-op.\n    \"\"\"\n    failures = []\n    # ASYMMETRIC TOLERANCES recalibrated 2026-05-15 PM (post Phase 0/1 diagnostic).\n    # Empirical pattern across 16 Brief 7 retries: Sonnet 4.6 translator with\n    # current bilingual mandates produces ES with bidirectional variance:\n    #   - chatInput v2 + Capa 1: ES UNDER-emits (ratio 0.76-0.78) — paragraph consolidation\n    #   - chatInput v3 + Capa 1: ES OVER-emits (ratio 1.28-1.43) — paragraph splitting +\n    #     formal Spanish prose expansion (10-25% natural) + Capa 1 mandated bilingual\n    #     sections (SIGNAL DIVERGENCE, ACRONYMS, etc.) add ES content\n    # Asymmetric design: tighter on UNDER-emit (truncation risk = real content loss);\n    # looser on OVER-emit (translation expansion = stylistic, content preserved).\n    # ratio_tol_under = 0.25 (keep prior calibration); ratio_tol_over = 0.50.\n    for tag, abs_tol, ratio_tol_under, ratio_tol_over in (\n        ('h3',    2,    None, None),\n        ('table', 1,    None, None),\n        ('p',     8,    0.25, 0.50),\n    ):\n        en, es = _count_in_lang_scope(html, tag)\n        if en == 0 and es == 0:\n            continue\n        diff = abs(en - es)\n        # Determine direction: ES under-emit (en > es) vs over-emit (es > en)\n        threshold = abs_tol\n        if ratio_tol_under is not None and en >= es:\n            # ES under-emits: tight tolerance (truncation risk)\n            threshold = max(abs_tol, int(max(en, es) * ratio_tol_under))\n        elif ratio_tol_over is not None and es > en:\n            # ES over-emits: looser tolerance (translation expansion, content preserved)\n            threshold = max(abs_tol, int(max(en, es) * ratio_tol_over))\n        if diff > threshold:\n            direction = 'ES UNDER' if en > es else 'ES OVER'\n            failures.append(f'<{tag}> EN={en} ES={es} Δ={diff} > tol={threshold} ({direction})')\n    if not failures:\n        return None\n    return ('bilingual structural parity exceeded (P02 truncation guard): '\n            + '; '.join(failures))\n\n\n# ── BRIEF-INFOGRAPHIC-PANEL-01 — \"Brief at a Glance\" panel parity ─────────────\n# Deployed 2026-05-30 into the Forecaster Format Output node (helper\n# `_briefAtAGlance`, scripts/patch-format-output-infographic-panel.py). Renders\n# a `<div class=\"chart-wrap glance-panel\" ...>` holding a `.kpi-grid` of `.kpi`\n# cells, each label carrying paired `<span class=\"lang-en\">`/`<span\n# class=\"lang-es\">` text. Renders on Brief 9+ ONLY — the 8 pre-existing briefs\n# never emit it. This guard is therefore PRESENCE-CONDITIONAL: a brief without a\n# `glance-panel` element is silently skipped, so legacy briefs never fire. When\n# the panel IS present it verifies (a) bilingual parity — panel labels appear\n# under BOTH a lang-en and a lang-es span scope — and (b) numeric consistency:\n# if the panel re-renders a base-case `NN%` and the cover `.kpi-grid` also shows\n# a base-case `NN%`, the two must agree (the panel is built from the same body,\n# so a divergence signals drift). WARN-only per calibrate-then-ratchet; promote\n# to BLOCKER after a clean Brief 9-11 window.\n\n_GLANCE_PANEL_OPEN_RE = re.compile(\n    r'<div\\b[^>]*class=\"[^\"]*\\bglance-panel\\b[^\"]*\"[^>]*>',\n    re.I,\n)\n# The data `.kpi-grid` inside the panel — the bilingual-parity scope. The\n# `chart-title` (\"Brief at a Glance\" / \"Resumen visual\") is ALWAYS bilingual, so\n# parity must be measured on the kpi cells, not the whole panel (else an EN-only\n# data emission false-passes on the title's ES span).\n_GLANCE_KPI_GRID_RE = re.compile(\n    r'<div\\b[^>]*class=\"[^\"]*\\bkpi-grid\\b[^\"]*\"[^>]*>(.*)', re.S | re.I,\n)\n# Base-case % cell as emitted by _briefAtAGlance: a kpi-val NN% whose adjacent\n# kpi-label carries the \"Base-case\" / \"Probabilidad base\" wording.\n_GLANCE_BASE_PCT_RE = re.compile(\n    r'<div class=\"kpi-val\">\\s*(\\d{1,3})\\s*%\\s*</div>\\s*'\n    r'<div class=\"kpi-label\">[\\s\\S]{0,120}?(?:Base-case|Probabilidad\\s+base)',\n    re.I,\n)\n# Cover kpi-grid base-case % — mirrors the helper's own cover-KPI extractor\n# (patch-format-output-infographic-panel.py:_briefAtAGlance), matched against\n# the pre-panel region only (the cover precedes the glance panel in the body).\n_COVER_BASE_PCT_RE = re.compile(\n    r'<div class=\"kpi-val\">\\s*(\\d{1,3})\\s*%\\s*</div>\\s*'\n    r'<div class=\"kpi-label\">[\\s\\S]{0,90}?(?:scenario|escenario|base)',\n    re.I,\n)\n\n\ndef check_glance_panel_parity(html: str) -> str | None:\n    \"\"\"BRIEF-INFOGRAPHIC-PANEL-01 — bilingual + numeric parity of the\n    \"Brief at a Glance\" infographic panel.\n\n    PRESENCE-CONDITIONAL: returns None (silent) when no `glance-panel`\n    element is present — the 8 pre-glance briefs never fire. When present:\n\n    (a) Bilingual parity — the panel's `.kpi-grid` data cells must surface\n        under BOTH a `lang-en` and a `lang-es` span scope (each `.kpi`\n        label pairs an EN span with an ES span; a grid emitting only one\n        language is a template/translation regression). Parity is measured\n        on the kpi-grid, NOT the always-bilingual chart-title.\n    (b) Cover↔panel numeric consistency — if the panel re-renders a\n        base-case `NN%` AND the cover `.kpi-grid` also exposes a base-case\n        `NN%`, the two must match. The panel is derived from the same\n        assembled body, so a mismatch signals drift.\n\n    WARN-only (calibrate-then-ratchet). Cross-ref\n    scripts/patch-format-output-infographic-panel.py.\n    \"\"\"\n    open_m = _GLANCE_PANEL_OPEN_RE.search(html)\n    if not open_m:\n        return None  # no panel → silent (legacy briefs)\n\n    panel = html[open_m.end():]\n    # Restrict bilingual parity to the data cells (the kpi-grid), excluding the\n    # always-bilingual chart-title that precedes it inside the panel.\n    grid_m = _GLANCE_KPI_GRID_RE.search(panel)\n    grid = grid_m.group(1) if grid_m else panel\n\n    failures = []\n\n    # (a) bilingual parity within the panel's data cells\n    has_en = re.search(r'<span\\b[^>]*class=\"[^\"]*\\blang-en\\b[^\"]*\"', grid) is not None\n    has_es = re.search(r'<span\\b[^>]*class=\"[^\"]*\\blang-es\\b[^\"]*\"', grid) is not None\n    if has_en and not has_es:\n        failures.append('panel content present in lang-en scope only (ES missing)')\n    elif has_es and not has_en:\n        failures.append('panel content present in lang-es scope only (EN missing)')\n\n    # (b) cover ↔ panel base-case % consistency\n    panel_pct_m = _GLANCE_BASE_PCT_RE.search(panel)\n    if panel_pct_m:\n        panel_pct = panel_pct_m.group(1)\n        # Cover region = everything before the panel opener, so we never\n        # re-match the panel's own % as the \"cover\" value.\n        cover_pct_m = _COVER_BASE_PCT_RE.search(html[:open_m.start()])\n        if cover_pct_m and cover_pct_m.group(1) != panel_pct:\n            failures.append(\n                f'base-case % mismatch: glance-panel shows {panel_pct}% but '\n                f'cover kpi-grid shows {cover_pct_m.group(1)}%')\n\n    if not failures:\n        return None\n    return ('glance-panel parity (BRIEF-INFOGRAPHIC-PANEL-01): '\n            + '; '.join(failures))\n\n\n# Required keys of the EDITORIAL-COVERAGE-BALANCE-01 Part B SD-COVERAGE block.\n# Confirmed against three sources (all agree): the live EDITORIAL_PROMPT rule in\n# workflow xYsufMSzxRINvIY7 (Part B template + controlled vocabularies),\n# brief-saver.py:ingest_coverage_tags (keys it reads on promote), and\n# prompts/EDITORIAL_PROMPT_RULES_PENDING_COVERAGE_BALANCE_2026-05-29.md.\n_SD_COVERAGE_RE = re.compile(r'<!--\\s*SD-COVERAGE:\\s*(\\{.*?\\})\\s*-->', re.S)\n_SD_COVERAGE_REQUIRED_KEYS = (\n    'epistemic_stance', 'horizon', 'frame', 'sector', 'regions', 'title',\n)\n\n\ndef check_coverage_balance_metadata(html: str) -> str | None:\n    \"\"\"EDITORIAL-COVERAGE-BALANCE-01 Part B — validate the self-emitted\n    `<!-- SD-COVERAGE: {json} -->` coverage-metadata block.\n\n    PRESENCE-CONDITIONAL: returns None (silent) when no SD-COVERAGE comment\n    block is present — the 8 legacy briefs predate the rule (live in the\n    Forecaster as of 2026-05-29; Brief 9 is the first to emit it) and must NOT\n    fire. When present:\n\n    (a) the JSON must parse;\n    (b) it must carry every required key (epistemic_stance, horizon, frame,\n        sector, regions, title) — these feed brief-saver.py:ingest_coverage_tags\n        into data/coverage_tags.json on promote.\n\n    A malformed or key-incomplete block would otherwise pass uncaught (the\n    ingest helper is best-effort and silently no-ops on bad blocks), so this is\n    the only gate that surfaces a broken emission at quality-check time.\n\n    WARN-only (calibrate-then-ratchet). Cross-ref\n    scripts/coverage-bias-audit.py + brief-saver.py:ingest_coverage_tags.\n    \"\"\"\n    m = _SD_COVERAGE_RE.search(html)\n    if not m:\n        return None  # rule not yet active for this brief — silent (legacy briefs)\n    try:\n        tags = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return ('SD-COVERAGE block does not parse as JSON '\n                '(EDITORIAL-COVERAGE-BALANCE-01 Part B)')\n    if not isinstance(tags, dict):\n        return ('SD-COVERAGE block is not a JSON object '\n                '(EDITORIAL-COVERAGE-BALANCE-01 Part B)')\n    missing = [k for k in _SD_COVERAGE_REQUIRED_KEYS if k not in tags]\n    if missing:\n        return ('SD-COVERAGE block missing required key(s): '\n                + ', '.join(missing)\n                + ' (EDITORIAL-COVERAGE-BALANCE-01 Part B)')\n    return None\n\n\n# Counter-frame is marked by the brief-tab TAG ([COUNTER-FRAME], emitted\n# UNTRANSLATED in both tabs) and/or the prose direction line (\"direction:\n# counter-frame\" EN; \"Dirección: contra-encuadre\" ES) — the SD-PRED-CAL JSON\n# carries only direction above/below, NOT the tag. COUNTER-FRAME-GATE-HARDENING-01\n# (2026-06-05): match the TAG or DIRECTION-LINE in EN+ES forms (verified against\n# the live ES rendering) — deliberately NOT a bare-word match, which would let a\n# glossary/prose mention of \"counter-frame\" defeat the gate. The untranslated tag\n# already covers the current bilingual pipeline; the ES direction/tag alternatives\n# future-proof a Spanish-only Clave brief.\n_COUNTER_FRAME_RE = re.compile(\n    r'\\[(?:COUNTER-FRAME|CONTRA-MARCO|CONTRA-ENCUADRE)\\]'\n    r'|dire(?:ction|cci[oó]n):\\s*(?:counter-frame|contra-marco|contra-encuadre)',\n    re.I)\n\n\ndef check_coverage_counter_frame_present(html: str) -> str | None:\n    \"\"\"EDITORIAL-COVERAGE-BALANCE-01 Part A — ledger directional diversity.\n\n    Every brief with formal predictions MUST carry >=1 honest\n    `direction: counter-frame` prediction (a falsifiable claim that, if it\n    resolves, runs AGAINST the brief's own dominant thesis). A ledger that only\n    ever bets one direction scores as a lucky directional wager, not calibration;\n    >=1 honest counter-bet per brief is what lets a good Brier read as skill.\n\n    PRESENCE-CONDITIONAL: silent (None) unless the brief carries a parseable\n    SD-PRED-CAL cover block with >=3 formal predictions (matches the brief-set\n    counter-frame rule scope — a 1-2 prediction brief may legitimately lack one).\n    Fires when >=3 predictions are present but NONE is tagged counter-frame.\n    Detection is from the prose/brief-tab marker, not the JSON.\n\n    WARN on the corpus runner (the legacy briefs predate the rule and self-document\n    as directional-balance gaps); elevated to a PROMOTE blocker via brief-saver\n    PROMOTE_BLOCKERS so a NEW brief cannot ship one-directional. Calibrate-then-\n    ratchet. Cross-ref scripts/coverage-bias-audit.py + the directional-balance\n    backlog thread (LEDGER-AUG15-DIRECTIONAL-BALANCE-01).\n    \"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        entries = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return None  # prediction_calibration / predstate own the malformed-block signal\n    if not isinstance(entries, list) or len(entries) < 3:\n        return None\n    if _COUNTER_FRAME_RE.search(html):\n        return None\n    return (f'{len(entries)} formal predictions but none tagged counter-frame — '\n            'EDITORIAL-COVERAGE-BALANCE-01 Part A requires >=1 honest counter-frame '\n            'prediction per brief for ledger directional diversity')\n\n\n# BRIEF-GLANCE-DIGEST-01 — the brief emits ONE machine-readable SD-GLANCE digest\n# block (distinct from the rendered glance-panel that check_glance_panel_parity\n# guards). Schema: thesis{en,es} · dual_track{stated_en,stated_es,actual_en,\n# actual_es} (REQUIRED) · headline_prediction{claim_en,claim_es,prob,by}\n# · key_figures[{value,label_en,label_es}] (1-3) · watch{event_en,event_es,date}\n# (OPTIONAL) · takeaway{en,es}.\n_SD_GLANCE_RE = re.compile(r'<!--\\s*SD-GLANCE:\\s*(\\{.*?\\})\\s*-->', re.S)\n_SD_GLANCE_ISO_DATE_RE = re.compile(r'^\\d{4}-\\d{2}-\\d{2}$')\n\n\ndef check_glance_digest(html: str) -> str | None:\n    \"\"\"BRIEF-GLANCE-DIGEST-01 — validate the self-emitted\n    `<!-- SD-GLANCE: {json} -->` machine-readable digest block.\n\n    PRESENCE-CONDITIONAL: returns None (silent) when no SD-GLANCE comment block\n    is present — legacy briefs predating the digest never fire. When present:\n\n    (a) the JSON must parse and be an object;\n    (b) it must carry thesis, dual_track, headline_prediction, key_figures,\n        takeaway;\n    (c) thesis + takeaway must be dicts with both `en` and `es`;\n    (d) headline_prediction must carry claim_en, claim_es, prob, by — with\n        `prob` an int in 55-95 and `by` an ISO YYYY-MM-DD date;\n    (e) key_figures must be a non-empty list of dicts each with value,\n        label_en, label_es;\n    (f) dual_track (REQUIRED) must be a dict with non-empty stated_en,\n        stated_es, actual_en, actual_es;\n    (g) watch (OPTIONAL) — silent if absent; if present must be a dict with\n        non-empty event_en, event_es + an ISO YYYY-MM-DD `date`.\n\n    A malformed digest would otherwise pass uncaught, so this is the only gate\n    that surfaces a broken emission at quality-check time.\n\n    WARN-only (calibrate-then-ratchet). Cross-ref the rendered glance-panel\n    guarded by check_glance_panel_parity.\n    \"\"\"\n    m = _SD_GLANCE_RE.search(html)\n    if not m:\n        return None  # no digest → silent (legacy briefs)\n    try:\n        digest = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return 'SD-GLANCE block does not parse as JSON (BRIEF-GLANCE-DIGEST-01)'\n    if not isinstance(digest, dict):\n        return 'SD-GLANCE block is not a JSON object (BRIEF-GLANCE-DIGEST-01)'\n\n    failures = []\n\n    missing = [k for k in ('thesis', 'dual_track', 'headline_prediction',\n                           'key_figures', 'takeaway') if k not in digest]\n    if missing:\n        failures.append('missing required key(s): ' + ', '.join(missing))\n\n    # (c) thesis + takeaway bilingual dicts\n    for key in ('thesis', 'takeaway'):\n        if key in digest:\n            val = digest[key]\n            if not isinstance(val, dict) or 'en' not in val or 'es' not in val:\n                failures.append(f'{key} must be an object with both en and es')\n\n    # (d) headline_prediction shape + prob/by validity\n    if 'headline_prediction' in digest:\n        hp = digest['headline_prediction']\n        if not isinstance(hp, dict):\n            failures.append('headline_prediction must be an object')\n        else:\n            hp_missing = [k for k in ('claim_en', 'claim_es', 'prob', 'by')\n                          if k not in hp]\n            if hp_missing:\n                failures.append('headline_prediction missing: '\n                                + ', '.join(hp_missing))\n            if 'prob' in hp:\n                prob = hp['prob']\n                if not isinstance(prob, int) or isinstance(prob, bool) \\\n                        or not (55 <= prob <= 95):\n                    failures.append(\n                        f'headline_prediction.prob must be an int in 55-95 '\n                        f'(got {prob!r})')\n            if 'by' in hp:\n                by = hp['by']\n                if not isinstance(by, str) or not _SD_GLANCE_ISO_DATE_RE.match(by):\n                    failures.append(\n                        f'headline_prediction.by must be ISO YYYY-MM-DD '\n                        f'(got {by!r})')\n\n    # (e) key_figures non-empty list of complete dicts\n    if 'key_figures' in digest:\n        kf = digest['key_figures']\n        if not isinstance(kf, list) or not kf:\n            failures.append('key_figures must be a non-empty list')\n        else:\n            for i, fig in enumerate(kf):\n                if not isinstance(fig, dict) or any(\n                        k not in fig for k in ('value', 'label_en', 'label_es')):\n                    failures.append(\n                        f'key_figures[{i}] must be an object with value, '\n                        f'label_en, label_es')\n\n    # (f) dual_track (REQUIRED) — dict with four non-empty bilingual strings\n    if 'dual_track' in digest:\n        dt = digest['dual_track']\n        if not isinstance(dt, dict):\n            failures.append('dual_track must be an object')\n        else:\n            dt_missing = [k for k in ('stated_en', 'stated_es', 'actual_en',\n                                      'actual_es')\n                          if not (isinstance(dt.get(k), str) and dt.get(k).strip())]\n            if dt_missing:\n                failures.append('dual_track missing/empty: '\n                                + ', '.join(dt_missing))\n\n    # (g) watch (OPTIONAL) — silent if absent; validated when present\n    if 'watch' in digest:\n        w = digest['watch']\n        if not isinstance(w, dict):\n            failures.append('watch must be an object')\n        else:\n            w_missing = [k for k in ('event_en', 'event_es')\n                         if not (isinstance(w.get(k), str) and w.get(k).strip())]\n            if w_missing:\n                failures.append('watch missing/empty: ' + ', '.join(w_missing))\n            date = w.get('date')\n            if not isinstance(date, str) or not _SD_GLANCE_ISO_DATE_RE.match(date):\n                failures.append(\n                    f'watch.date must be ISO YYYY-MM-DD (got {date!r})')\n\n    if not failures:\n        return None\n    return ('SD-GLANCE digest invalid (BRIEF-GLANCE-DIGEST-01): '\n            + '; '.join(failures))\n\n\n# ── L02: regulatory disclaimer presence (EN + ES) ────────────────────────────\n# Mandates that every published brief carries the bilingual regulatory\n# disclaimer in its footer block. Origin: EDITORIAL-LEGAL-02 L02 — added\n# 2026-05-02 after disclaimer was wired into the global footer template\n# (commit 687d8f8). Acts as a regression guard against accidental template\n# rollback. Two anchors required (one per language) to avoid false-pass on\n# briefs that include only one half of the disclaimer.\n\n_L02_EN_RE = re.compile(\n    r'\\b(?:not\\s+investment\\s+advice|editorial\\s+analysis(?:\\s+and\\s+opinion)?|'\n    r'does\\s+not\\s+constitute\\s+investment\\s+advice)\\b',\n    re.IGNORECASE,\n)\n_L02_ES_RE = re.compile(\n    r'\\b(?:no\\s+constituye\\s+asesoramiento|an[áa]lisis\\s+editorial(?:\\s+y\\s+opini[óo]n)?|'\n    r'no\\s+constituye\\s+recomendaci[óo]n)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_regulatory_disclaimer_present(html: str) -> str | None:\n    \"\"\"Bilingual regulatory disclaimer must be present in the brief.\n\n    Cross-ref: EDITORIAL-LEGAL-02 L02. The footer template renders both\n    EN and ES disclaimers (commit 687d8f8). This check fires if either\n    language anchor is missing — that signals a template regression or a\n    brief produced before the global footer was wired.\n    \"\"\"\n    en_hit = _L02_EN_RE.search(html)\n    es_hit = _L02_ES_RE.search(html)\n    missing = []\n    if not en_hit:\n        missing.append('EN (\"not investment advice\" / \"editorial analysis\")')\n    if not es_hit:\n        missing.append('ES (\"no constituye asesoramiento\" / \"análisis editorial\")')\n    if not missing:\n        return None\n    return (f'regulatory disclaimer missing in: {\", \".join(missing)} — '\n            f'L02 footer template regression (cross-ref EDITORIAL-LEGAL-02)')\n\n\n_FOOTER_DISCIPLINE_FORBIDDEN = (\n    \"WEF Global Risks Report 2026\",\n    \"Eurasia Group Top Risks 2026\",\n    \"Prospect Theory · Loss Aversion\",\n    \"Full Analysis: all rights reserved\",\n    \"Barkow Status Competition\",\n    \"Meadows Leverage Points\",\n)\n\n_FOOTER_REGION_RES = (\n    re.compile(r'<footer\\b[^>]*class=\"[^\"]*\\bsite-footer\\b[^\"]*\"[^>]*>(.*?)</footer>',\n               re.S | re.I),\n    re.compile(r'<div\\b[^>]*class=\"[^\"]*\\bcover-meta\\b[^\"]*\"[^>]*>(.*?)</div>',\n               re.S | re.I),\n)\n\n\ndef check_no_internal_backlog_refs(html: str) -> str | None:\n    \"\"\"RUNNER-NO-INTERNAL-BACKLOG-REFS-01 (2026-05-23 PM) — BLOCKER.\n\n    Forbids published brief content from exposing internal backlog\n    references, ticket IDs, or hard-coded brief numbers. Catches:\n    - `[BACKLOG: X-Y-Z]` notation\n    - \"Brief N question\" / \"pending Brief N coverage\" / \"see Brief N\"\n    - Internal naming conventions (TEMA-X, PROC-N when used as references)\n\n    Origin: Brief 8 v4 emitted 6× `[BACKLOG: BRIEF-10]` + 2× hard-coded\n    \"Brief 10 question\" → manually cleaned 2026-05-23 PM. Pairs with\n    EDITORIAL_PROMPT R26 (prevention at generation).\n\n    Severity: BLOCKER. Brief will be blocked at promote-gate if matches.\n    \"\"\"\n    # Extract body content (skip <script>, <style>, JSON-LD)\n    body = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)\n    body = re.sub(r'<style[^>]*>.*?</style>', '', body, flags=re.DOTALL | re.IGNORECASE)\n\n    hits = []\n\n    # Pattern 1: [BACKLOG: ...] notation\n    for m in re.finditer(r'\\[BACKLOG:[^\\]]+\\]', body):\n        snippet = body[max(0, m.start()-40):m.end()+40].replace('\\n', ' ').strip()\n        hits.append(f\"`{m.group(0)}` in '...{snippet[:120]}...'\")\n\n    # Pattern 2: \"Brief N question\" / \"Brief N coverage\" / \"pending Brief N\" / \"see Brief N\" / \"in Brief N\"\n    # Allow narrative references like \"Brief 4 Critical Minerals\" or \"as identified in Brief 4\" via title-pattern allowlist\n    # Forbid: bare \"Brief N question\" or \"pending Brief N\" without descriptive title\n    for m in re.finditer(r'(pending|see|that is a|is a|Reading: Brief|to a)\\s+Brief\\s+\\d+\\b(?!\\s+[A-Z])', body, re.IGNORECASE):\n        snippet = body[max(0, m.start()-40):m.end()+40].replace('\\n', ' ').strip()\n        hits.append(f\"'{m.group(0)}' in '...{snippet[:120]}...'\")\n\n    # Pattern 3: BRIEF-NN-TOPIC-NN internal-ticket IDs in body\n    for m in re.finditer(r'BRIEF-\\d+-[A-Z][A-Z0-9-]+-\\d+', body):\n        snippet = body[max(0, m.start()-40):m.end()+40].replace('\\n', ' ').strip()\n        hits.append(f\"`{m.group(0)}` in '...{snippet[:120]}...'\")\n\n    if not hits:\n        return None\n\n    # Show top-3 hits\n    sample = '; '.join(hits[:3])\n    return f'{len(hits)} internal-backlog ref(s) in published content: {sample}. Public-facing alternatives: \"pending dedicated coverage\" / \"forthcoming standalone treatment\" / \"queued as a separate brief\". cf. EDITORIAL_PROMPT R26.'\n\n\ndef check_footer_namedrop_discipline(html: str) -> str | None:\n    \"\"\"Block name-drop strings from <footer.site-footer> / <div.cover-meta>.\n\n    Cross-ref: EDITORIAL-FOOTER-DISCIPLINE-01. Defensive guard against\n    silent reintroduction of the legal patch from 2026-04-30 (commit\n    2dddef7), which deleted WEF/Eurasia/Prospect-Theory name-drops from\n    footer + cover-meta after WEF terms (no implied affiliation), Eurasia\n    Group \"Top Risks\" trademark, and nominative-fair-use factor 3.\n\n    Body-level citations of WEF/Eurasia with [SOURCE | DOC | PAGE] are\n    legitimate nominative use and not affected — this check is scoped\n    to footer/cover-meta regions only.\n    \"\"\"\n    regions = []\n    for rgx in _FOOTER_REGION_RES:\n        regions.extend(rgx.findall(html))\n    if not regions:\n        return None\n    hits = []\n    for region in regions:\n        for forbidden in _FOOTER_DISCIPLINE_FORBIDDEN:\n            if forbidden in region:\n                hits.append(forbidden)\n    if not hits:\n        return None\n    unique = sorted(set(hits))\n    return (f\"footer/cover-meta name-drop reintroduced: {unique} — \"\n            f\"EDITORIAL-FOOTER-DISCIPLINE-01 (legal patch 2dddef7 regression)\")\n\n\ndef check_investor_verdict_contradiction(html: str) -> str | None:\n    \"\"\"Investor Verdict line contradicting an upside scenario in the body.\n\n    Origin: Brief 5 (European Defense, 2026-05-01) discussed NATO fracture\n    creating procurement-space upside for European sovereigns, but the Verdict\n    line read \"downside from NATO fracture\" — contradicting its own scenario.\n    \"\"\"\n    has_fracture_upside = bool(re.search(\n        r'fracture[^<]{0,150}(?:upside|benefit|procurement\\s+space)',\n        html, re.I,\n    ))\n    has_downside_verdict = bool(re.search(\n        r'Verdict:[^<]{0,200}downside\\s+from\\s+NATO\\s+fracture',\n        html, re.I,\n    ))\n    if has_fracture_upside and has_downside_verdict:\n        return ('Investor verdict contradicts scenario analysis '\n                '(fracture upside described but verdict says downside)')\n    return None\n\n\ndef check_bare_inference_number(html: str) -> str | None:\n    \"\"\"Specific quantitative threshold cited only via fn-3 (analytical inference).\n\n    A number followed immediately by <sup> pointing to fn-3 means the figure\n    has no corpus citation — only analytical inference. Acceptable when the\n    figure is generalized; not acceptable when the brief makes a specific\n    threshold claim (days/weeks).\n    \"\"\"\n    if re.search(\n        r'\\d+\\s*(?:days?|weeks?|d[ií]as?|semanas?)\\s*<sup>\\s*<a\\s+href=[\"\\']#fn-3[\"\\']',\n        html, re.I,\n    ):\n        return ('quantitative threshold (days/weeks) cited only from fn-3 '\n                'inference — needs corpus citation or \"SD estimate\" label')\n    return None\n\n\ndef check_internal_monitor_clarity(html: str) -> str | None:\n    \"\"\"Reference to the internal signal digest without its full form.\n\n    Readers don't know what \"Shadow Dynamics 2026-04-30\" means on its own.\n    Full form on first reference: \"Shadow Dynamics Intelligence Monitor\n    (internal signal digest, YYYY-MM-DD)\".\n    \"\"\"\n    if re.search(\n        r'Shadow\\s+Dynamics\\s+\\d{4}-\\d{2}-\\d{2}(?!\\s*\\()',\n        html, re.I,\n    ):\n        return ('internal signal digest referenced without full form — '\n                'use \"Shadow Dynamics Intelligence Monitor (internal signal '\n                'digest, YYYY-MM-DD)\"')\n    return None\n\n\ndef check_date_arithmetic(html: str) -> str | None:\n    \"\"\"DATE-ARITHMETIC-CHECK-01 (Phase 1).\n\n    Detect \"N units after [date]\" claims whose magnitude contradicts\n    explicit dates within ±300 chars of the claim.\n\n    Origin: Spain Blackout 2026-04-27 shipped \"OP 7.4 approved June 12,\n    2025 — fourteen months after the cascade [April 28, 2025]\". Real\n    gap is ~45 days, not 14 months. Eight occurrences shipped to\n    Substack newsletter + LinkedIn. Runner missed it; this catches that\n    pattern class.\n\n    Algorithm: for each `(N) (unit) after` claim, look for ≥2 explicit\n    Month-Day-Year dates in ±300 chars (English + Spanish months,\n    \"April 28, 2025\" or \"28 abril 2025\" forms). If present, compute\n    actual gap (max-min) and flag only when the claim is LARGER than\n    what window dates can support (ratio < 0.8) — i.e., the arithmetic\n    is impossible no matter which two window dates pair with the claim.\n    The reverse direction (ratio > 1.2) is skipped because window dates\n    that are wider than the claim are typically unrelated dates, not a\n    contradiction (the claim's actual anchor may be a named event).\n\n    Skip claims preceded by upper-bound qualifiers (\"less than\",\n    \"fewer than\", \"almost\", \"nearly\", \"menos de\", \"casi\") — those are\n    approximations where a smaller actual gap is correct.\n\n    Warning, not blocker — rhetorical framings without explicit dates\n    (\"decades after the war\", \"months later\") don't fire because they\n    require ≥2 explicit dates in window.\n    \"\"\"\n    UPPER_BOUND_RE = re.compile(\n        r'(less\\s+than|fewer\\s+than|almost|nearly|menos\\s+de|casi|'\n        r'aproximadamente|aprox\\.?|approximately|approx\\.?)\\s*$',\n        re.IGNORECASE,\n    )\n    text = re.sub(r'<[^>]+>', ' ', html)\n\n    def _extract_dates(window: str) -> list:\n        out = []\n        for m in _DATE_MD_Y_RE.finditer(window):\n            mname, day, year = m.group(1).lower(), int(m.group(2)), int(m.group(3))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi and 1 <= day <= 31 and 1900 <= year <= 2100:\n                try:\n                    out.append(date(year, mi, day))\n                except ValueError:\n                    pass\n        for m in _DATE_DM_Y_RE.finditer(window):\n            day, mname, year = int(m.group(1)), m.group(2).lower(), int(m.group(3))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi and 1 <= day <= 31 and 1900 <= year <= 2100:\n                try:\n                    out.append(date(year, mi, day))\n                except ValueError:\n                    pass\n        return out\n\n    findings: list[str] = []\n    for cm in _DATE_CLAIM_RE.finditer(text):\n        # Skip approximations where smaller actual is correct.\n        prefix = text[max(0, cm.start() - 40):cm.start()]\n        if UPPER_BOUND_RE.search(prefix):\n            continue\n\n        n_str, unit = cm.group(1), cm.group(2)\n        if n_str.isdigit():\n            n = int(n_str)\n        else:\n            n = _DATE_NUM_WORDS.get(n_str.lower())\n            if n is None:\n                continue\n        unit_days = _DATE_UNIT_DAYS.get(unit.lower())\n        if not unit_days:\n            continue\n        claimed_days = n * unit_days\n\n        start = max(0, cm.start() - 300)\n        end = min(len(text), cm.end() + 300)\n        dates = _extract_dates(text[start:end])\n        if len(dates) < 2:\n            continue\n        actual_gap = (max(dates) - min(dates)).days\n        if actual_gap == 0:\n            continue\n        ratio = actual_gap / claimed_days\n        # Fire only when claim is larger than what window dates can support.\n        # ratio > 1.2 is skipped — typically unrelated dates in the window,\n        # not a real contradiction (the claim's anchor may be a named event).\n        if ratio < 0.8:\n            findings.append(\n                f'\"{cm.group(0)}\" claims ~{claimed_days}d but window dates span only '\n                f'{actual_gap}d ({min(dates).isoformat()}→{max(dates).isoformat()}, '\n                f'ratio={ratio:.2f})'\n            )\n\n    # Phase 2 — duration-to-end-date pattern: \"N-unit ... running to DATE\".\n    # For each match, look in ±300 chars BEFORE for a start anchor (explicit\n    # date or month-year reference). Compute span vs claimed; fire when\n    # mismatch in EITHER direction (unlike Phase 1 which skips ratio > 1.2\n    # because window dates are typically unrelated to the claim's anchor).\n    def _parse_iso_or_dmy(s: str) -> \"date | None\":\n        try:\n            if re.match(r'^\\d{4}-\\d{2}-\\d{2}$', s):\n                y, mo, d = s.split('-')\n                return date(int(y), int(mo), int(d))\n            m = re.match(r'^(\\d{1,2})-(\\d{1,2})-(\\d{4})$', s)\n            if m:\n                d, mo, y = int(m.group(1)), int(m.group(2)), int(m.group(3))\n                # ES convention DD-MM-YYYY (the brief 6 ES side uses 10-11-2026\n                # for Nov 10 2026). If MM > 12, swap.\n                if mo > 12 and d <= 12:\n                    d, mo = mo, d\n                return date(y, mo, d)\n        except (ValueError, IndexError):\n            return None\n        return None\n\n    for cm in _DATE_RUNNING_TO_RE.finditer(text):\n        n_str, unit, end_str = cm.group(1), cm.group(2), cm.group(3)\n        if not n_str.isdigit():\n            continue\n        n = int(n_str)\n        unit_days = _DATE_UNIT_DAYS.get(unit.lower())\n        if not unit_days:\n            continue\n        claimed_days = n * unit_days\n        end_date = _parse_iso_or_dmy(end_str)\n        if end_date is None:\n            continue\n        # Search ±300 chars BEFORE the claim for a plausible start anchor.\n        win_start = max(0, cm.start() - 300)\n        win = text[win_start:cm.start()]\n        # Try explicit dates first (most precise).\n        anchors = _extract_dates(win)\n        # Then add month-year references (less precise — use day 15).\n        for mym in _DATE_MONTH_YEAR_RE.finditer(win):\n            mname, year = mym.group(1).lower(), int(mym.group(2))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi:\n                try:\n                    anchors.append(date(year, mi, 15))\n                except ValueError:\n                    pass\n        # Filter to anchors strictly before the end date.\n        anchors = [a for a in anchors if a < end_date]\n        if not anchors:\n            continue\n        # Most plausible start = latest anchor before end (closest to claim).\n        candidate_start = max(anchors)\n        actual_days = (end_date - candidate_start).days\n        if actual_days <= 0:\n            continue\n        ratio = actual_days / claimed_days\n        # Fire when claim is meaningfully off in EITHER direction (≥50% drift).\n        # Wider tolerance than Phase 1 because month-year fallback uses day 15\n        # approximation (±15 days slack baked in).\n        if ratio > 1.5 or ratio < 0.67:\n            findings.append(\n                f'\"{cm.group(0)[:80]}\" claims ~{n} {unit} ({claimed_days}d) '\n                f'but {candidate_start.isoformat()}→{end_date.isoformat()} '\n                f'spans {actual_days}d (ratio={ratio:.2f})'\n            )\n\n    if findings:\n        head = '; '.join(findings[:2])\n        more = f' (+{len(findings) - 2} more)' if len(findings) > 2 else ''\n        return f'date-arithmetic vs window-dates mismatch: {head}{more}'\n    return None\n\n\n# ── DEF01-DEF03: defensibility checks (RUNNER-DEFENSIBILITY-01) ────────────────\n# Strategic origin: SD product moat = \"what a generic LLM with web search can't\n# reproduce in 4h\". DEF01-03 are heuristic gates that surface defensibility\n# regressions before publish. Calibration audit 2026-05-02:\n# reports/defensibility-audit_2026-05-02.md.\n#\n# All three start as WARNING. Promote to BLOCKER after stable false-positive\n# baseline across ≥3 newly-generated briefs (mirrors L01/footnote_integrity\n# pattern). Threshold rationale lives in audit-defensibility.py docstring.\n\n# Allowlist — primary-source domains. Hits to these \"displace LLM time\" because\n# they require precise citation to a primary document, not summarization.\n# Aggregator domains (Wikipedia, generic news) and self-references do NOT count.\n_DEF03_CANONICAL_DOMAINS: frozenset[str] = frozenset({\n    # Spain / EU primary\n    'boe.es', 'cnmc.es', 'ree.es', 'csn.es', 'ec.europa.eu', 'europa.eu',\n    'eib.org', 'ecb.europa.eu', 'consilium.europa.eu', 'europarl.europa.eu',\n    'eda.europa.eu', 'frontex.europa.eu',\n    # Energy / electricity\n    'entsoe.eu', 'iea.org', 'irena.org',\n    # Defence / nuclear\n    'nato.int', 'sipri.org', 'iiss.org', 'world-nuclear-news.org',\n    # Trade / multilateral\n    'imf.org', 'worldbank.org', 'oecd.org', 'wto.org', 'bis.org',\n    'unctad.org', 'un.org',\n    # US official\n    'sec.gov', 'treasury.gov', 'federalreserve.gov', 'bls.gov', 'bea.gov',\n    'energy.gov', 'state.gov', 'defense.gov',\n    # Sectoral / industry primary\n    'unef.es', 'tsmc.com', 'asml.com', 'smic.com',\n    # Research institutions (curated)\n    'rand.org', 'brookings.edu', 'atlanticcouncil.org', 'csis.org',\n    'cfr.org', 'carnegieendowment.org', 'merics.org', 'bruegel.org',\n    'europeanpolicycentre.eu',\n    # Risk-research\n    'weforum.org', 'eurasiagroup.net',\n    # Maritime / additional gov + think-tanks (RUNNER-THRESHOLD-CALIBRATION-AUDIT-01,\n    # 2026-05-29 — COSCO recognizer gap: legit primary sources absent from allowlist,\n    # not a too-strict ≥5 floor). uscc.gov = US-China Economic & Security Review\n    # Commission; gesetze-im-internet.de = German federal law gazette (peer of boe.es);\n    # clingendael.org = Clingendael Institute (peer of RAND/CSIS); usnwc.edu = US Naval\n    # War College (China Maritime Studies Institute).\n    'uscc.gov', 'gesetze-im-internet.de', 'clingendael.org', 'usnwc.edu',\n})\n\n_DEF03_AGGREGATOR_DOMAINS: frozenset[str] = frozenset({\n    'wikipedia.org', 'wikimedia.org', 'creativecommons.org',\n    'shadowdynamics.ai', 'substack.com',\n})\n\n\n# QA-INCONTESTABLE-03 — temporal_window_consistency check.\n# Catches: time-range labels in §VERDICT or §FORMAL PREDICTIONS that\n# don't match prediction-deadline arithmetic ±10%, when no hedge word.\n# Brief 6 issue #2 case: \"6-month window\" claim with explicit\n# (2026-05-08 → 2026-10-31) actual span = 5.78 months. Without hedge\n# word, ratio 0.96 → flag.\n\n_TWC_WINDOW_RE = re.compile(\n    r'(\\d+(?:[\\.,]\\d+)?)\\s*(?:[–-]\\s*(\\d+(?:[\\.,]\\d+)?)\\s*)?'\n    r'[-\\s]?(month|day|week|year|mes|mese[s]?|d[ií]a|semana|año)s?\\s*'\n    r'(?:prediction\\s+)?(?:window|period|ventana|periodo|per[ií]odo)',\n    re.IGNORECASE,\n)\n_TWC_ISO_DATE_RE = re.compile(r'\\b(20\\d{2})-(\\d{2})-(\\d{2})\\b')\n_TWC_HEDGE_RE = re.compile(\n    r'(approximately|roughly|aproximadamente|aprox\\.?|approx\\.?|~|circa|cerca\\s+de|'\n    r'about|around|alrededor|en\\s+torno)',\n    re.IGNORECASE,\n)\n_TWC_UNIT_DAYS = {\n    'day': 1, 'd[ií]a': 1, 'dia': 1, 'día': 1,\n    'week': 7, 'semana': 7,\n    'month': 30.4375, 'mes': 30.4375, 'mese': 30.4375, 'meses': 30.4375,\n    'year': 365.25, 'año': 365.25,\n}\n\n\ndef check_temporal_window_consistency(html: str) -> str | None:\n    \"\"\"Catch time-range labels (e.g. '6-month window') that don't match\n    the actual span between two adjacent ISO dates ±10%, when no hedge\n    word is present.\n\n    Origin: Brief 6 issue #2 ('6-month prediction window' label vs\n    actual 5.78-month span). The existing date_arithmetic check catches\n    duration-to-end-date mismatches (e.g. '6-month suspension running\n    to 2026-11-10' from a 2025-10-15 source). This is the\n    prediction-window specialisation: looks for `(N|N-M) (unit) window`\n    + 2 ISO dates in proximity, computes ratio.\n\n    Algorithm:\n      1. For each match of `(N) (unit) window` in the prose:\n      2. Find ≤2 ISO dates within ±200 chars (the implied window endpoints).\n      3. Compute claim_days = N * unit_days. If range form (N-M), use mean.\n      4. Compute actual_days = |date_max - date_min|.\n      5. If both present and ratio outside [0.9, 1.1]:\n         - Skip if hedge word ('approximately', 'roughly', '~') in\n           ±50 chars before claim.\n         - Otherwise flag.\n\n    WARNING-only. Cost: 0 runtime.\n    \"\"\"\n    text = re.sub(r'<[^>]+>', ' ', html)\n    findings = []\n    for m in _TWC_WINDOW_RE.finditer(text):\n        n_low = m.group(1).replace(',', '.')\n        n_high = m.group(2).replace(',', '.') if m.group(2) else None\n        unit = m.group(3).lower()\n\n        # Hedge in ±50 chars before claim → skip\n        hedge_window = text[max(0, m.start() - 50):m.start()]\n        if _TWC_HEDGE_RE.search(hedge_window):\n            continue\n\n        try:\n            n = (float(n_low) + float(n_high)) / 2 if n_high else float(n_low)\n        except ValueError:\n            continue\n        unit_days = None\n        for k, v in _TWC_UNIT_DAYS.items():\n            if re.match(k, unit):\n                unit_days = v\n                break\n        if unit_days is None:\n            continue\n        claim_days = n * unit_days\n\n        # Find ISO dates in ±200 chars\n        proximity = text[max(0, m.start() - 200):min(len(text), m.end() + 200)]\n        iso_dates = []\n        for d_match in _TWC_ISO_DATE_RE.finditer(proximity):\n            try:\n                y, mo, d = int(d_match.group(1)), int(d_match.group(2)), int(d_match.group(3))\n                iso_dates.append(date(y, mo, d))\n            except ValueError:\n                pass\n        if len(iso_dates) < 2:\n            continue\n        actual_days = (max(iso_dates) - min(iso_dates)).days\n        if actual_days <= 0:\n            continue\n        ratio = claim_days / actual_days\n        if 0.9 <= ratio <= 1.1:\n            continue\n        # Direction: claim larger or smaller than reality\n        direction = 'overstates' if ratio > 1.1 else 'understates'\n        findings.append(\n            f'\"{m.group(0).strip()}\" {direction} window: claim ~{claim_days:.0f}d '\n            f'vs actual {actual_days}d between {min(iso_dates)} → {max(iso_dates)} '\n            f'(ratio={ratio:.2f}; threshold ±10%)'\n        )\n\n    if findings:\n        return '; '.join(findings[:3])\n    return None\n\n\ndef _domain_of(url: str) -> str:\n    m = re.match(r'https?://([^/]+)/?', url)\n    if not m:\n        return ''\n    host = m.group(1).lower()\n    if host.startswith('www.'):\n        host = host[4:]\n    parts = host.split('.')\n    if len(parts) >= 3 and parts[-2] in {'gov', 'co', 'com', 'org', 'ac'}:\n        return '.'.join(parts[-3:])\n    return '.'.join(parts[-2:]) if len(parts) >= 2 else host\n\n\n_PREDICTIONS_CACHE: dict[str, list] | None = None\n\n\ndef _load_predictions_yaml() -> dict[str, list]:\n    \"\"\"Returns {brief_filename: [{'id': ..., 'falsifiable_by_date': ...}, ...]}.\n\n    Light parser — extracts brief_source -> id+date dicts without PyYAML\n    dependency. Cache persists across calls within the same process.\n    DEF01 only measures len(); D-QA-22 floor#2 width check uses the date.\n    \"\"\"\n    global _PREDICTIONS_CACHE\n    if _PREDICTIONS_CACHE is not None:\n        return _PREDICTIONS_CACHE\n\n    yaml_path = os.path.join(\n        os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'predictions.yaml',\n    )\n    if not os.path.exists(yaml_path):\n        _PREDICTIONS_CACHE = {}\n        return _PREDICTIONS_CACHE\n\n    with open(yaml_path, encoding='utf-8') as f:\n        text = f.read()\n\n    # Strip comment lines so schema-doc examples (`#   - id: PRED-...`) don't\n    # get picked up as phantom entries by the regex below. Real entries live\n    # at column 0 with `  - id:` (indented under `predictions:`); comment\n    # examples in the header start with `#` and trip the regex otherwise.\n    text = '\\n'.join(\n        line for line in text.splitlines()\n        if not line.lstrip().startswith('#')\n    )\n\n    by_brief: dict[str, list] = {}\n    for entry in re.finditer(\n        r'-\\s+id:\\s*(\\S+).*?(?=\\n\\s*-\\s+id:|\\Z)',\n        text, re.DOTALL,\n    ):\n        block = entry.group(0)\n        pid = entry.group(1)\n        bs = re.search(r'brief_source:\\s*(\\S+)', block)\n        if not bs:\n            continue\n        bs_val = bs.group(1).strip().strip('\"').strip(\"'\")\n        fbd = re.search(r'falsifiable_by_date:\\s*(\\S+)', block)\n        fbd_val = fbd.group(1).strip().strip('\"').strip(\"'\") if fbd else ''\n        by_brief.setdefault(bs_val, []).append({\n            'id': pid,\n            'falsifiable_by_date': fbd_val,\n        })\n\n    _PREDICTIONS_CACHE = by_brief\n    return by_brief\n\n\ndef _extract_brief_filename(html: str) -> str | None:\n    \"\"\"Extract SD_*.html filename from og:url or canonical link, if present.\"\"\"\n    for pat in (\n        r'<meta\\s+property=\"og:url\"\\s+content=\"https?://[^\"]+/([^\"/]+\\.html)\"',\n        r'<link\\s+rel=\"canonical\"\\s+href=\"https?://[^\"]+/([^\"/]+\\.html)\"',\n    ):\n        m = re.search(pat, html)\n        if m:\n            name = m.group(1)\n            if name.startswith('SD_'):\n                return name\n    return None\n\n\n_DEF01_PROB_RE = re.compile(\n    r'(?:we\\s+assess|assess(?:ment)?|estimate|probabilit[yáí]a?|probability|'\n    r'probabilidad)[^.<>]{0,40}?(\\d{2,3})(?:\\s*[-–]\\s*(\\d{2,3}))?\\s*%',\n    re.IGNORECASE,\n)\n_DEF01_FALSIFY_RE = re.compile(\n    r'(?:falsifi(?:able|cation|ed)|verify[:\\s]|verificar[:\\s]|'\n    r'resolution[:\\s]|observable[\\s_]condition|resolution\\s+source)',\n    re.IGNORECASE,\n)\n_DEF01_DATE_RE = re.compile(\n    r'(?:by|before|antes\\s+de|hasta)\\s+(?:\\d{4}-\\d{2}-\\d{2}|'\n    r'(?:Q[1-4]|H[12]|end\\s+of|finales?\\s+de)\\s*20\\d{2}|'\n    r'\\d{1,2}\\s+(?:[A-Z][a-zé]+|[a-zé]+\\s+(?:de|of)?)\\s*20\\d{2})',\n    re.IGNORECASE,\n)\n\n\ndef check_def01_formal_predictions_present(html: str) -> str | None:\n    \"\"\"DEF01 — brief must declare ≥1 formal prediction AND ledger them.\n\n    Three cases:\n      (1) Ledger has ≥1 entry for brief_source=filename → PASS\n      (2) Ledger empty AND body has prob+falsify+date triad → WARN\n          \"ledger gap\" (new 2026-05-10): predictions made but extraction\n          step (`extract-prediction-stubs.py`) was skipped before promote.\n          Brief 6 Critical Minerals 2026-05-10 was the canonical instance.\n      (3) Ledger empty AND no triad → WARN \"defensibility gap\": brief is\n          matchable by a generic LLM with web search; the track-record\n          moat requires verifiable forecasts.\n\n    WARNING level — false positives expected on early briefs where the\n    predictions ledger lacks a backfill entry. Promote to BLOCKER (in\n    runner) and to brief-saver promote-time gate after corpus stabilisation\n    + Brief 7 calibration confirms 0 false-positive rate.\n\n    Strategic origin: a brief without verifiable forecasts is matchable by\n    a generic LLM with web search. The track-record moat is built by the\n    predictions ledger (see docs/content-planning/PREDICTION_DESIGN.md).\n    \"\"\"\n    fname = _extract_brief_filename(html)\n    ledger_count = 0\n    if fname:\n        ledger = _load_predictions_yaml()\n        ledger_count = len(ledger.get(fname, []))\n\n    has_prob = bool(_DEF01_PROB_RE.search(html))\n    has_falsify = bool(_DEF01_FALSIFY_RE.search(html))\n    has_date = bool(_DEF01_DATE_RE.search(html))\n    has_triad = has_prob and has_falsify and has_date\n\n    # Case 1: ledger has entries — pass.\n    if ledger_count >= 1:\n        return None\n\n    # Case 2: HTML has triad but ledger is empty — extraction was skipped.\n    # This is the Brief 6 2026-05-10 defect class. Pre-2026-05-10 the OR\n    # fallback returned None here, allowing the brief to promote with an\n    # empty ledger (audit-trail-as-moat momentarily false).\n    if has_triad:\n        return (\n            f'DEF01 ledger gap: HTML contains formal-prediction triad '\n            f'(probability+verification+falsifiable date) but predictions.yaml '\n            f'has 0 entries with brief_source={fname or \"(unknown)\"}. '\n            f'Run: python3 scripts/extract-prediction-stubs.py '\n            f'briefs/{fname or \"<filename>\"} → review notes → commit. '\n            f'cf. feedback_pre_promote_coherence_audit.md'\n        )\n\n    # Case 3: ledger empty AND no triad — defensibility gap.\n    missing: list[str] = []\n    if not has_prob:\n        missing.append('probability number')\n    if not has_falsify:\n        missing.append('verification/falsification recipe')\n    if not has_date:\n        missing.append('falsifiable date/window')\n\n    return (\n        f'DEF01 defensibility gap: no entries in data/predictions.yaml for '\n        f'this brief AND body lacks formal-prediction triad '\n        f'(missing: {\", \".join(missing) if missing else \"all three\"}); '\n        f'cf. docs/content-planning/PREDICTION_DESIGN.md'\n    )\n\n\n_DEF02_MONITOR_RE = re.compile(\n    r'Shadow\\s+Dynamics\\s+Intelligence\\s+Monitor', re.IGNORECASE,\n)\n_DEF02_PATTERN_RECOGNITION_RE = re.compile(\n    r'<h2[^>]*>[^<]*?'\n    r'(?:PATTERN\\s+RECOGNITION|RECONOCIMIENTO\\s+DE\\s+PATR[OÓ]N)'\n    r'[^<]*?</h2>',\n    re.IGNORECASE,\n)\n\n\ndef check_def02_primary_contribution_present(html: str) -> str | None:\n    \"\"\"DEF02 — brief must declare its primary contribution.\n\n    Pass if EITHER:\n      (a) Body references \"Shadow Dynamics Intelligence Monitor\" (internal\n          signal corpus), OR\n      (b) Brief contains a \"PATTERN RECOGNITION\" / \"RECONOCIMIENTO DE PATRÓN\"\n          h2 section (cross-brief synthesis).\n\n    Without one of these markers, the brief reads as repackaged public\n    sources — defensibility-equivalent to LLM-with-web-search output.\n    \"\"\"\n    if _DEF02_MONITOR_RE.search(html):\n        return None\n    if _DEF02_PATTERN_RECOGNITION_RE.search(html):\n        return None\n    return (\n        'DEF02 defensibility gap: no primary-contribution marker — neither '\n        'Shadow Dynamics Intelligence Monitor reference nor Pattern '\n        'Recognition / Reconocimiento de Patrón section detected. Add '\n        'internal-corpus reference or cross-brief synthesis section.'\n    )\n\n\ndef check_def03_canonical_url_floor(html: str) -> str | None:\n    \"\"\"DEF03 — sources block must have ≥5 distinct canonical primary-source URLs.\n\n    Canonical = primary-source domain allowlist (BOE, ENTSO-E, EIB, SEC, IMF,\n    OECD, ECB, NATO, etc.). Aggregators (Wikipedia) and self-references\n    (shadowdynamics.ai, substack.com) do not count.\n\n    No-op when no <section class=\"sources\"> exists (legacy briefs).\n\n    WARNING (not BLOCKER) — false positives expected on briefs whose primary\n    sources lack public canonical URLs (REE prospective studies, CSN licences).\n    Threshold ≥5 calibrated against published corpus 2026-05-02.\n    \"\"\"\n    sources_match = re.search(\n        r'<section class=\"sources\"[^>]*>.*?</section>',\n        html, re.DOTALL,\n    )\n    # 2026-06-01 FP-precision (Brief-9 Cluster 5): the canonical source URLs live\n    # in the footnote list <ol id=\"footnotes\"> (the citation mechanism), while\n    # <section class=\"sources\"> is a short label/intro block with 0 hrefs.\n    # Scanning only the latter falsely reported \"0 primary URLs\" on briefs that\n    # actually carry many (1932: 32 total / 6 canonical in the footnotes). Count\n    # URLs across BOTH the sources section AND the footnotes list.\n    footnotes_match = re.search(r'<ol id=\"footnotes\".*?</ol>', html, re.DOTALL)\n    if not sources_match and not footnotes_match:\n        return None  # no sources block — other checks handle this\n\n    sources_block = ((sources_match.group(0) if sources_match else '')\n                     + (footnotes_match.group(0) if footnotes_match else ''))\n    # Recursive URL extraction (extended 2026-05-15 PM per Brief 7 audit class E7):\n    # accept both <a href=\"URL\"> AND raw URLs in text (e.g. <li>...Source: https://...</li>).\n    # Raw-URL pattern intentionally narrowed: must follow whitespace or `>` (HTML-text\n    # context); excludes trailing punctuation/quotes/brackets common in prose.\n    urls = set(re.findall(r'href=\"(https?://[^\"]+)\"', sources_block))\n    for m in re.finditer(r'(?:^|[\\s>])(https?://[^\\s<>\"\\)\\]\\}]+)', sources_block):\n        urls.add(m.group(1).rstrip('.,;:'))\n\n    canonical = 0\n    for u in urls:\n        d = _domain_of(u)\n        if d in _DEF03_AGGREGATOR_DOMAINS:\n            continue\n        if d in _DEF03_CANONICAL_DOMAINS:\n            canonical += 1\n        elif any(d.endswith('.' + dom) for dom in _DEF03_CANONICAL_DOMAINS):\n            canonical += 1\n\n    if canonical >= 5:\n        return None\n    return (\n        f'DEF03 defensibility gap: only {canonical} canonical primary-source '\n        f'URL(s) in <section class=\"sources\"> (target ≥5; aggregator/self URLs '\n        f'excluded). Strengthen with primary-source links '\n        f'(BOE, ENTSO-E, SEC, IMF, ECB, official institutional).'\n    )\n\n\n# ── DOUBLE TRACK GAP legend missing (added 2026-05-15 PM per Brief 7 audit C1) ───\n# When a <table> contains a `GAP 0-10` (or \"BRECHA 0-10\") column header without\n# an explicit directional legend nearby, readers can misinterpret HIGH score as\n# consonance instead of incoherence. Fire WARN to prompt operator to add legend.\n\n_GAP_HEADER_RE = re.compile(\n    # Accept ASCII hyphen, en-dash (U+2013), em-dash (U+2014). Brief 7\n    # 1353 used en-dash → silent false-negative on prior regex (ASCII-only)\n    # while legend was still missing. A14a fix 2026-05-16.\n    r'<th[^>]*>\\s*(?:GAP|BRECHA)\\s*0[-–—]10\\s*</th>',\n    re.IGNORECASE,\n)\n_GAP_LEGEND_PATTERNS = (\n    'higher = greater',\n    'higher = more',\n    'rhetoric-reality divergence',\n    'incoherence',\n    'mayor = mayor',\n    'discordancia',\n    'incoherencia',\n    'divergencia retórica',\n)\n\n\ndef check_double_track_gap_legend(html: str) -> str | None:\n    \"\"\"Flag DOUBLE TRACK tables that declare GAP 0-10 column without\n    directional legend. Score direction (HIGH = more incoherence) is\n    non-obvious to readers without explicit framing.\"\"\"\n    hits = list(_GAP_HEADER_RE.finditer(html))\n    if not hits:\n        return None\n    missing = []\n    for m in hits:\n        # A14c 2026-05-16: extend window through end of table + 800 chars.\n        # Prior 800-char window after <th> missed legends placed after\n        # </table> when tables are >800 chars (Brief 7 1353 GAP tables\n        # are 3-4 KB). Dynamic extension captures legends placed right\n        # after </table> by Format Output A14b auto-injection.\n        window_start = max(0, m.start() - 200)\n        table_close = html.find('</table>', m.end())\n        if table_close == -1:\n            window_end = min(len(html), m.end() + 800)\n        else:\n            window_end = min(len(html), table_close + 800)\n        window = html[window_start:window_end].lower()\n        if not any(pat.lower() in window for pat in _GAP_LEGEND_PATTERNS):\n            missing.append(f'at char {m.start()}')\n    if not missing:\n        return None\n    return (f'{len(missing)} DOUBLE TRACK table(s) with `GAP 0-10` column header '\n            f'lack directional legend (HIGH = greater rhetoric-reality divergence '\n            f'/ incoherence). Reader may misread high score as consonance. '\n            f'Add legend under table: \"GAP 0-10: higher = greater rhetoric-reality '\n            f'divergence (incoherence). 0 = aligned; 10 = maximally over-claimed.\" '\n            f'Hits: {\", \".join(missing[:3])}')\n\n\n# D-QA-22 floor #2 width — operator-adopted 2026-05-04 (Y).\n# Tied to STRATEGY.md v0.6 §Q2 hybrid kill criterion deadline 2026-10-31.\n# When kill criterion reformulates, update this constant + the message.\n_FLOOR2_DEADLINE = '2026-10-31'\n\n\n# D-QA-22 adoption date: from 2026-05-04 onward, Briefs 6/7/8+ must carry\n# ≥2 sub-deadline predictions. Briefs published before this date are exempt\n# (lenient regime — fire only on ≥3 total + 0 sub-deadline as audit trail).\n_DQA22_ADOPTION_DATE = '2026-05-04'\n\n\ndef check_predictions_floor2_width(html: str) -> str | None:\n    \"\"\"D-QA-22 — flag briefs missing required sub-deadline prediction count.\n\n    Per operator decision 2026-05-04 (Y), briefs published on/after that\n    adoption date must include ≥2 predictions with falsifiable_by_date ≤\n    2026-10-31 to widen kill-criterion floor #2 base from N=3 to N=9.\n\n    Two extraction paths (ledger preferred when populated):\n\n    1. `predictions.yaml` — populated post-promote, authoritative.\n    2. HTML regex `Falsifiable by YYYY-MM-DD` — used pre-promote when\n       ledger has no entries for this brief yet. Origin: Brief 6 Critical\n       Minerals pre-promote audit 2026-05-08 caught 0 sub-deadline\n       contribution; original ledger-only path no-op'd in staging because\n       predictions extract to YAML at promote-time, not at save-time.\n\n    Two scoping regimes by publication date (filename-derived):\n\n    - Pre-2026-05-04 (legacy): lenient — fire only when ≥3 total entries\n      AND 0 sub-deadline. Preserves audit-trail for legacy briefs that\n      flagged the gap motivating D-QA-22.\n    - 2026-05-04+ (post-adoption): strict — fire whenever <2 sub-deadline\n      and brief has any formal predictions. Catches the contractual-default\n      class invisible to the legacy threshold.\n\n    No-op when:\n    - Brief has no formal predictions (no `Falsifiable by` markers AND no\n      ledger entries) — early/legacy brief without prediction section.\n    - Brief filename not detectable (preview HTML, etc.).\n    \"\"\"\n    fname = _extract_brief_filename(html)\n    if not fname:\n        return None\n\n    # Regime by publication date\n    m_date = re.search(r'SD_(\\d{4})(\\d{2})(\\d{2})_', fname)\n    if m_date:\n        file_date = f'{m_date.group(1)}-{m_date.group(2)}-{m_date.group(3)}'\n        post_adoption = file_date >= _DQA22_ADOPTION_DATE\n    else:\n        post_adoption = False  # unparseable filename → default lenient\n\n    # Source of truth: ledger first, fall back to HTML pre-promote\n    ledger = _load_predictions_yaml()\n    entries = ledger.get(fname, [])\n\n    if entries:\n        total = len(entries)\n        sub_deadline_count = sum(\n            1 for e in entries\n            if e.get('falsifiable_by_date')\n            and str(e['falsifiable_by_date']) <= _FLOOR2_DEADLINE\n        )\n        source = 'ledger'\n    else:\n        # HTML extraction: count distinct `Falsifiable by YYYY-MM-DD` markers.\n        # EN-side only (ES translation duplicates same predictions).\n        en_dates = re.findall(\n            r'Falsifiable\\s+by\\s+(\\d{4}-\\d{2}-\\d{2})', html, re.IGNORECASE\n        )\n        if not en_dates:\n            return None  # no formal predictions section\n        total = len(en_dates)\n        sub_deadline_count = sum(1 for d in en_dates if d <= _FLOOR2_DEADLINE)\n        source = 'HTML pre-promote'\n\n    if post_adoption:\n        # Strict: ≥2 sub-deadline required per D-QA-22 Brief 6/7/8 contract\n        if sub_deadline_count >= 2:\n            return None\n        return (\n            f'D-QA-22 floor#2 width ({source}): brief has {total} formal '\n            f'prediction(s) but only {sub_deadline_count} sub-deadline '\n            f'(falsifiable_by_date ≤ {_FLOOR2_DEADLINE}). Post-{_DQA22_ADOPTION_DATE} '\n            f'briefs require ≥2 sub-deadline per D-QA-22; add sub-deadline '\n            f'prediction(s) before promote.'\n        )\n    # Lenient legacy: ≥3 total + 0 sub-deadline → fire (audit-trail honesty)\n    if total < 3 or sub_deadline_count >= 1:\n        return None\n    return (\n        f'D-QA-22 floor#2 width ({source}): legacy brief has {total} '\n        f'predictions but 0 with falsifiable_by_date ≤ {_FLOOR2_DEADLINE}. '\n        f'Pre-D-QA-22 brief; flagged honestly per the gap that motivated '\n        f'the rule.'\n    )\n\n\ndef check_toc_anchor_integrity(html: str) -> str | None:\n    \"\"\"TOC ↔ body section-anchor integrity check.\n\n    Brief 6 (commit cb3ac78) shipped with the ES TOC missing the ACRÓNIMOS\n    section link that existed in the EN TOC, and the en-sec-9 (Section IX\n    Sector Exposure) entry was missing MOFCOM. Each was caught only by\n    post-promote human inspection. Pre-promote runner check prevents\n    recurrence by enforcing that every TOC `<a href=\"#sec-id\">` resolves\n    to a body `id=\"sec-id\"`, and (optionally, audit-only) every body\n    `<section>`/`<h2>` with a sec-id is referenced in the TOC.\n\n    Two failure classes:\n      (a) DANGLING TOC LINK — `<a href=\"#X\">` in TOC but no `id=\"X\"` in body.\n          Result: clicking the TOC entry goes nowhere. Hard structural bug.\n      (b) ORPHAN SECTION — body has `id=\"X\"` matching the section-id pattern\n          but no TOC entry. Section exists but isn't discoverable from TOC.\n\n    Scope: only `#en-sec-N` / `#es-sec-N` / `#en-sec-acronyms` / `#es-sec-acronyms`\n    patterns. Excludes footnote anchors (`#fn-N`) and arbitrary in-body anchors\n    which have their own integrity (footnote_reference_integrity).\n\n    WARNING-class. Promote to BLOCKER after one clean Brief 7+ window per\n    feedback_runner_calibrate_then_ratchet.\n    \"\"\"\n    sec_anchor_re = re.compile(\n        r'<a\\s+href=\"#((?:en-sec|es-sec)-[a-z0-9_-]+)\"',\n        re.IGNORECASE,\n    )\n    sec_id_re = re.compile(\n        r'\\bid=\"((?:en-sec|es-sec)-[a-z0-9_-]+)\"',\n        re.IGNORECASE,\n    )\n\n    toc_targets = {m.group(1).lower() for m in sec_anchor_re.finditer(html)}\n    body_ids = {m.group(1).lower() for m in sec_id_re.finditer(html)}\n\n    if not toc_targets and not body_ids:\n        # legacy brief without sec-id pattern — skip\n        return None\n\n    # The brief title heading carries a sec-id (canonical sec-1 slot, e.g.\n    # <h2 id=\"en-sec-1\">Brief 8 — …</h2>) but is NOT a navigable TOC entry —\n    # a TOC never links the document's own title. Exclude title headings from\n    # orphan detection (2026-05-29 FP fix; was flagging en-sec-1/es-sec-1).\n    title_ids = {\n        m.group(1).lower() for m in re.finditer(\n            r'<h2[^>]*\\bid=\"((?:en-sec|es-sec)-[a-z0-9_-]+)\"[^>]*>\\s*'\n            r'(?:Brief|Informe)\\s+\\d+\\s*[—–-]', html, re.IGNORECASE)\n    }\n    dangling = sorted(toc_targets - body_ids)\n    orphan = sorted(body_ids - toc_targets - title_ids)\n\n    msgs = []\n    if dangling:\n        msgs.append(\n            f\"{len(dangling)} dangling TOC link(s) — `<a href=\\\"#X\\\">` \"\n            f\"with no matching body id=\\\"X\\\": {dangling[:5]}\"\n            + (f' (+{len(dangling)-5} more)' if len(dangling) > 5 else '')\n        )\n    if orphan:\n        msgs.append(\n            f\"{len(orphan)} orphan section(s) — body id=\\\"X\\\" with no TOC \"\n            f\"entry: {orphan[:5]}\"\n            + (f' (+{len(orphan)-5} more)' if len(orphan) > 5 else '')\n        )\n\n    if not msgs:\n        return None\n\n    return (\n        \"TOC ↔ body section-anchor mismatch: \" + \"; \".join(msgs) +\n        \" — every TOC link must resolve to a body section, and every body \"\n        \"section with a sec-id should appear in the TOC.\"\n    )\n\n\n# ── RUNNER-COMPANION-BRIEF-CONSISTENCY-01 (2026-05-26) ───────────────────────\n# Companion files (_linkedin.txt, _substack.md) are generated from a brief HTML\n# snapshot. When a brief is edited post-generation (band widening, fact\n# correction, framing change) and companions are not regenerated, they fall\n# silently stale. This check compares numerical claims at the headline-tier\n# (KPI grid + cover verdict-box) against companion text and flags mismatches.\n#\n# Canonical instance: Brief 8 (SD_20260522_1354_Payments) promote 2026-05-26 —\n# companions generated 13:01 on 2026-05-25 + brief edited 5× during evening\n# band-widening pass; 9 stale companion items caught only by manual sweep at\n# promote. v1 scope is numeric-only (entity anchor + value pair); thesis-frame\n# drift and date-only drift deferred to v2.\n\ndef _normalize_numeric_token(token: str) -> str:\n    \"\"\"Reduce a numeric token to a comparable canonical string.\n\n    Strips currency/locale/unit decoration so '60%', '60 %', '~60%',\n    '$200B', '~$200B', '200 billion' all compare on the digit portion.\n    Returns the first digit-bearing chunk normalised: dot-decimal,\n    leading-zero stripped where unambiguous. Used for proximity-based\n    same-vs-different value comparison only — NOT for unit conversion.\n    \"\"\"\n    s = token.strip().lower()\n    s = s.replace(',', '.').replace('–', '-').replace('—', '-')\n    # Drop currency / approximation prefix\n    s = re.sub(r'^[~≈€$£¥]+', '', s)\n    # Pull first numeric run\n    m = re.search(r'\\d+(?:\\.\\d+)?', s)\n    if not m:\n        return ''\n    return m.group(0)\n\n\ndef _normalize_numeric_tokens_all(token: str) -> set[str]:\n    \"\"\"Like _normalize_numeric_token but returns ALL numeric runs as a set.\n\n    Handles compound brief values like '5 vs 7', '15-9', '12-24' where the\n    KPI represents a comparison or range — for drift detection, presence of\n    EITHER value in companion proximity counts as clean.\n    \"\"\"\n    s = token.strip().lower()\n    s = s.replace(',', '.').replace('–', '-').replace('—', '-')\n    s = re.sub(r'^[~≈€$£¥]+', '', s)\n    return {m.group(0) for m in re.finditer(r'\\d+(?:\\.\\d+)?', s)}\n\n\n# Regex: a single \"headline\" numeric value worth comparing across companion.\n# Tighter than NUMERICAL_CLAIM_RE — must look like an editorial-claim number,\n# not a date or anchor. Allows %, $/€/£/¥ amounts, bare integers ≥2 digits,\n# 'N billion/million/trillion' words.\n#\n# 2026-05-26 INTERNAL-BASELINE-CONSISTENCY-01 patch: added T (trillion suffix)\n# to unit-letter group `[BMK]` → `[BMKT]`. Without T, the regex truncates\n# `$1.5T` to `$1` (no decimal point match since `.5T` doesn't satisfy `\\b`)\n# and fails `$2T` entirely. Canonical Brief 8 $2T/$3T/$1.5T claims previously\n# rendered as `$1` truncations in companion check matching — latent defect\n# fixed here while enabling intra-brief detection of the same metric class.\n_COMPANION_NUMBER_RE = re.compile(\n    r'(?:'\n    r'[~≈]?\\d+(?:[.,]\\d+)?\\s*%'                                          # 60%, ~60%\n    r'|[~≈]?[€$£¥]\\s*\\d+(?:[.,]\\d+)?\\s*(?:[BMKT]|billion|million|trillion|bn|mn|tn)?\\b'  # $200B, $2T\n    r'|\\b\\d+(?:[.,]\\d+)?\\s*(?:billion|million|trillion)\\b'               # 200 billion\n    r')',\n    re.IGNORECASE,\n)\n\n\n# RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01 (2026-05-29) — companion drift FP fix.\n# The 2026-05-29 audit found all companion_brief_consistency firings were FPs\n# driven by NUMBER-SEMANTICS, not the ±250 window: a % anchor was \"drifting\"\n# against a $ figure or a count near the same entity, and bill numbers were read\n# as metrics (\"5 gates vs HR 3633\", \"48% vs $864B\", \"70% vs €400M\", \"19 CTPPs vs\n# 70%\"). A genuine drift of a value is same-unit by definition, so only same-unit\n# companion numbers are drift candidates; identifier numbers are never metrics.\ndef _value_unit_type(raw: str) -> str:\n    \"\"\"Classify a numeric token's unit family: 'pct' | 'cur' | 'int'.\"\"\"\n    if '%' in raw:\n        return 'pct'\n    if re.search(r'[€$£¥]', raw) or re.search(\n        r'\\b(?:billion|million|trillion|bn|mn|tn)\\b', raw, re.IGNORECASE\n    ) or re.search(r'\\d\\s*[BMKT]\\b', raw):\n        return 'cur'\n    return 'int'\n\n\n# Identifier prefixes that make a following number a reference/label, not a\n# metric: HR 3633, Notice 61, Order 834, Article 35, WP 1270, Section 5, §3, etc.\n_COMPANION_IDENTIFIER_PRE_RE = re.compile(\n    r'(?:\\b(?:HR|H\\.R\\.|No|No\\.|Notice|Order|Article|Art|Art\\.|Section|Sec|Sec\\.'\n    r'|Chapter|WP|Reg|Regulation|EO|Directive|Resolution)\\s*[-#]?\\s*|§\\s*)$',\n    re.IGNORECASE,\n)\n\n# Same-metric last-mile (RUNNER-STRUCTURAL-FP-PER-CHECK-AUDIT-01, 2026-05-29):\n# even when the anchor matches the right entity, the nearby same-unit number may\n# be a DIFFERENT metric/state, not a drift of the brief value. Three principled\n# guards distinguish a genuine drift from an incidental neighbour number.\n#\n# (a) Forward-looking target/projection qualifier — the number is a goal/target,\n# not a current-state value (\"spending accelerating toward 3.5% GDP\" vs the\n# brief's current 2.1%). Same metric, different temporal state → not drift.\n_COMPANION_TARGET_QUALIFIER_RE = re.compile(\n    r'(?:toward|towards|hacia|up\\s+to|hasta|rising\\s+to|climbing\\s+to|'\n    r'target(?:ing|ed)?(?:\\s+of)?|objetivo\\s+de|goal\\s+of|meta\\s+de|'\n    r'projected(?:\\s+to)?|proyectad[oa]s?|by\\s+20\\d\\d|para\\s+(?:el\\s+)?20\\d\\d|'\n    r'reach(?:ing|es)?|alcanzar?)\\s*$',\n    re.IGNORECASE,\n)\n# (b) Date day-numbers + legal-citation numbers (\"18 November 2025\", \"18 U.S.C.\")\n# and (c) hyphenated compound counts (\"60-vote cloture\") are bound to their\n# construct, not the anchor metric.\n_COMPANION_MONTHS = (\n    r'January|February|March|April|May|June|July|August|September|October|'\n    r'November|December|enero|febrero|marzo|abril|mayo|junio|julio|agosto|'\n    r'septiembre|setiembre|octubre|noviembre|diciembre'\n)\n_COMPANION_DATE_STATUTE_FOLLOW_RE = re.compile(\n    r'\\s+(?:' + _COMPANION_MONTHS + r')\\b|\\s+U\\.?\\s?S\\.?\\s?C\\.?\\b', re.IGNORECASE)\n_COMPANION_MONTH_PRE_RE = re.compile(\n    r'\\b(?:' + _COMPANION_MONTHS + r')\\s+$', re.IGNORECASE)\n_COMPANION_COMPOUND_FOLLOW_RE = re.compile(r'-[A-Za-z]')\n\n\ndef _extract_headline_anchors(html: str) -> list[tuple[str, str, str]]:\n    \"\"\"Extract (anchor_label, numeric_value, source_tag) tuples worth comparing.\n\n    Two sources:\n      1. KPI grid: `<div class=\"kpi-val\">VAL</div><div class=\"kpi-label\">\n         <span class=\"lang-en\">LABEL</span>...` — value is the headline metric,\n         label is the entity anchor.\n      2. Verdict-box: `<div class=\"verdict lang-en\">PROSE</div>` — extract\n         each number with ~60 chars of preceding text as anchor.\n\n    Returns: list of (label, value, source) — empty list if nothing extracted.\n    \"\"\"\n    anchors: list[tuple[str, str, str]] = []\n\n    # KPI grid: pair val with EN label\n    kpi_re = re.compile(\n        r'<div class=\"kpi-val\">([^<]+)</div>\\s*'\n        r'<div class=\"kpi-label\">\\s*'\n        r'<span class=\"lang-en\">([^<]+)</span>',\n        re.IGNORECASE,\n    )\n    for m in kpi_re.finditer(html):\n        val = m.group(1).strip()\n        label = m.group(2).strip()\n        anchors.append((label, val, 'KPI'))\n\n    # Verdict box (lang-en variant — Substack/LinkedIn are English-only)\n    verdict_re = re.compile(\n        r'<div class=\"verdict lang-en\">(.*?)</div>',\n        re.DOTALL | re.IGNORECASE,\n    )\n    for m in verdict_re.finditer(html):\n        verdict_text = re.sub(r'<[^>]+>', ' ', m.group(1))\n        verdict_text = re.sub(r'\\s+', ' ', verdict_text).strip()\n        for num_m in _COMPANION_NUMBER_RE.finditer(verdict_text):\n            val = num_m.group(0).strip()\n            # Anchor = 60-char window left of the number, last meaningful phrase\n            left = verdict_text[max(0, num_m.start() - 60):num_m.start()].strip()\n            # Trim to last 4-7 words to act as fuzzy-match anchor\n            words = re.findall(r'\\b[A-Za-z][\\w.+-]*\\b', left)\n            if not words:\n                continue\n            anchor = ' '.join(words[-5:])\n            anchors.append((anchor, val, 'VERDICT'))\n\n    return anchors\n\n\n_STOP_WORDS = frozenset({\n    'with', 'from', 'this', 'that', 'they', 'their', 'about', 'into', 'across',\n    'these', 'those', 'while', 'where', 'shall', 'after', 'before', 'than',\n    'then', 'between', 'system', 'systems', 'have', 'been', 'will', 'were',\n    'assess', 'full', 'remaining', 'range', 'holdings', 'testing',\n})\n\n\ndef _extract_anchor_terms(anchor: str) -> list[str]:\n    \"\"\"Pick distinctive search seeds (stems) from a KPI label / verdict phrase.\n\n    Returns up to 2 stems: longest non-stop tokens reduced to 7-char prefix\n    to tolerate morphological variation (Bifurcation ↔ bifurcated, Stablecoin\n    ↔ stablecoins). Min stem length 5 for specificity.\n\n    Multiple seeds let us require BOTH to be near the same companion site —\n    cuts false positives versus single-stem matching on common terms.\n    \"\"\"\n    tokens = re.findall(r'\\b[A-Za-z][\\w-]+\\b', anchor)\n    salient = [t for t in tokens if len(t) >= 5 and t.lower() not in _STOP_WORDS]\n    if not salient:\n        salient = [t for t in tokens if len(t) >= 4]\n    if not salient:\n        return []\n    # Sort longest first — distinctiveness proxy\n    salient.sort(key=len, reverse=True)\n    stems = []\n    for tok in salient[:2]:\n        stem = tok[:7] if len(tok) >= 7 else tok\n        if len(stem) >= 5:\n            stems.append(stem)\n    return stems\n\n\ndef _find_anchor_in_companion(\n    anchor: str, companion: str, min_stems: int = 1,\n) -> int | None:\n    \"\"\"Locate companion site that anchors `anchor` semantically. Returns char index or None.\n\n    Match policy:\n      - 1 distinctive stem present → its first occurrence, ONLY if the stem is\n        unambiguous in the companion (≤2 occurrences); a stem occurring many\n        times (e.g. \"China\", \"Scenario\") anchors to an arbitrary site and the\n        drift comparison then picks a wrong-metric number nearby\n      - 2 distinctive stems present → require both within 200 chars\n      - fewer than `min_stems` stems → return None (anchor too vague to check)\n\n    `min_stems` raises the bar for noisy anchor sources: verdict-box anchors\n    (5 words before a number, often carrying source acronyms + stopwords) pass\n    min_stems=2 so a single common stem can't false-match; curated KPI labels\n    keep min_stems=1. (Anchor-precision fix, RUNNER-STRUCTURAL-FP-PER-CHECK-\n    AUDIT-01 2026-05-29 — the loose single-common-stem match was the root FP\n    driver behind \"48% vs 98%\" / \"70% vs 29%\" residuals.)\n\n    Tolerates morphology (Bifurcation ↔ bifurcated, stablecoin ↔ stablecoins)\n    via 7-char prefix seeding. Requires word boundary before stem to avoid\n    mid-word false positives.\n    \"\"\"\n    stems = _extract_anchor_terms(anchor)\n    if len(stems) < min_stems:\n        return None\n    if not stems:\n        return None\n    stem_res = [re.compile(r'\\b' + re.escape(s), re.IGNORECASE) for s in stems]\n    primary_hits = list(stem_res[0].finditer(companion))\n    if not primary_hits:\n        return None\n    if len(stems) == 1:\n        # Single-stem match is reliable only when the stem is unambiguous.\n        if len(primary_hits) > 2:\n            return None\n        return primary_hits[0].start()\n    # Two stems: require BOTH within ±200 chars\n    secondary = stem_res[1]\n    for m in primary_hits:\n        win_start = max(0, m.start() - 200)\n        win_end = min(len(companion), m.end() + 200)\n        if secondary.search(companion[win_start:win_end]):\n            return m.start()\n    # Fall back to primary-only when secondary missing — still informative\n    return primary_hits[0].start()\n\n\ndef check_companion_brief_consistency(\n    html: str,\n    brand: str | None = None,\n    briefs_dir: str | None = None,\n    filename: str | None = None,\n) -> str | None:\n    \"\"\"Compare numerical claims in brief HTML vs companion files (linkedin + substack).\n\n    For each headline-tier numeric anchor extracted from the HTML (KPI grid +\n    verdict-box), search each companion file for the same entity anchor (fuzzy\n    match, SequenceMatcher ratio ≥0.85). If the entity is found in the companion\n    and a DIFFERENT numeric value appears within ±100 chars, flag the mismatch.\n\n    No-op (return None) when:\n      - briefs_dir or filename not provided (CLI dispatch only)\n      - neither companion file exists on disk\n      - HTML has no extractable headline anchors\n\n    WARN-tier first instance (calibrate before promoting to BLOCKER) per\n    [[feedback_runner_calibrate_then_ratchet]]. Conditional no-op preserves\n    back-compat for legacy briefs without standard KPI/verdict markup.\n    \"\"\"\n    if not briefs_dir or not filename:\n        return None\n    stem = os.path.basename(filename)\n    if stem.endswith('.html'):\n        stem = stem[:-5]\n    linkedin_path = os.path.join(briefs_dir, stem + '_linkedin.txt')\n    substack_path = os.path.join(briefs_dir, stem + '_substack.md')\n\n    companion_texts: list[tuple[str, str]] = []  # (surface, text)\n    for surface, path in (('linkedin', linkedin_path), ('substack', substack_path)):\n        if os.path.exists(path):\n            try:\n                with open(path, encoding='utf-8') as f:\n                    companion_texts.append((surface, f.read()))\n            except OSError:\n                continue\n    if not companion_texts:\n        return None\n\n    anchors = _extract_headline_anchors(html)\n    if not anchors:\n        return None\n\n    hits: list[str] = []\n    for label, value, source_tag in anchors:\n        canonical_set = _normalize_numeric_tokens_all(value)\n        if not canonical_set:\n            continue\n        for surface, comp_text in companion_texts:\n            # Verdict-box anchors are noisy (5 words before a number) → require\n            # 2 distinctive stems; curated KPI labels are reliable at 1.\n            idx = _find_anchor_in_companion(\n                label, comp_text, min_stems=2 if source_tag == 'VERDICT' else 1,\n            )\n            if idx is None:\n                continue\n            # Pull ±250 char proximity window around the matched anchor site.\n            # Wider than the spec's 100 because LinkedIn/Substack often place\n            # the entity term in one clause and its numeric value in the next\n            # sentence; 100 char clipped legitimate same-sentence pairings.\n            win_start = max(0, idx - 250)\n            win_end = min(len(comp_text), idx + len(label) + 250)\n            window = comp_text[win_start:win_end]\n            # Skip if the matched site sits in boilerplate footer (price tags,\n            # subscription CTAs). Avoids FP on '$19/month', '$149 Founding'.\n            _BOILERPLATE_MARKERS = (\n                'Founding Member', '$19/month', '$99/year', 'Subscribe to Shadow',\n                'Subscribe → ', 'Read the full analysis',\n            )\n            if any(marker in window for marker in _BOILERPLATE_MARKERS):\n                continue\n            window_nums = list(_COMPANION_NUMBER_RE.finditer(window))\n            # Unit-type gate (2026-05-29 audit): only companion numbers of the\n            # SAME unit family as the brief value can be a genuine drift of it.\n            # A % anchor doesn't drift against a $ figure or a count near the\n            # same entity — that mismatch was the entire FP class.\n            brief_unit = _value_unit_type(value)\n            window_canon = set()\n            for n in window_nums:\n                if _value_unit_type(n.group(0)) != brief_unit:\n                    continue\n                # Same-metric guard (a): skip target/projection-qualified numbers\n                # — a forward-looking goal isn't a drift of a current-state KPI.\n                if _COMPANION_TARGET_QUALIFIER_RE.search(\n                        window[max(0, n.start() - 30):n.start()]):\n                    continue\n                c = _normalize_numeric_token(n.group(0))\n                if c:\n                    window_canon.add(c)\n            window_canon.discard('')\n            # If brief value is a bare integer (no %, no currency, no unit-word),\n            # also accept word-boundary integer matches in window. Otherwise the\n            # check would false-positive on KPIs like '5 vs 7' that compare\n            # against a companion containing other percentages but the literal\n            # '5 participant central banks' nearby.\n            is_bare_int = brief_unit == 'int' and re.fullmatch(\n                r'\\d+(?:\\s*(?:vs|–|—|-)\\s*\\d+)?', value.strip()) is not None\n            if is_bare_int:\n                # Pull bare integers (≤4 digits, not part of dates/decimals), but\n                # skip ones that are actually %/currency-decorated (different unit)\n                # or identifier numbers (HR 3633, Notice 61, Art 35 — not metrics).\n                for bm in re.finditer(r'(?<![.\\d-])\\b\\d{1,4}\\b(?![\\d.])', window):\n                    nxt = window[bm.end():bm.end() + 16]\n                    if nxt[:1] == '%':\n                        continue\n                    # (c) hyphenated compound count (\"60-vote\"), (b) date/statute\n                    # day-number (\"18 November\", \"18 U.S.C.\") → bound to construct.\n                    if _COMPANION_COMPOUND_FOLLOW_RE.match(nxt):\n                        continue\n                    if _COMPANION_DATE_STATUTE_FOLLOW_RE.match(nxt):\n                        continue\n                    pre = window[max(0, bm.start() - 30):bm.start()]\n                    if re.search(r'[€$£¥]\\s*$', pre):\n                        continue\n                    if _COMPANION_IDENTIFIER_PRE_RE.search(pre):\n                        continue\n                    # (b) \"November 18\" — day-number preceded by a month name.\n                    if _COMPANION_MONTH_PRE_RE.search(pre):\n                        continue\n                    # (a) target/projection-qualified count.\n                    if _COMPANION_TARGET_QUALIFIER_RE.search(pre):\n                        continue\n                    window_canon.add(bm.group(0))\n            if not window_canon:\n                # Anchor exists in companion but no nearby number — not flagged\n                # (companion may legitimately drop the metric). v2 might warn.\n                continue\n            if canonical_set & window_canon:\n                continue  # same number present nearby — clean (any compound match)\n            # Drift: companion has a different number near same anchor.\n            # Skip date-like values (4-digit years 1900-2099) and single-digit\n            # values that arise from regex window-edge truncation (e.g., '$1'\n            # from cut-off '$196'). Prefer the largest remaining candidate as\n            # the most-likely stale headline metric.\n            other_candidates = [\n                v for v in window_canon\n                if not (len(v) == 4 and v.startswith(('19', '20')))\n                and not (len(v) == 1 and v not in canonical_set)\n            ]\n            if not other_candidates:\n                continue\n            try:\n                other = max(other_candidates, key=lambda x: float(x))\n            except ValueError:\n                other = other_candidates[0]\n            hits.append(\n                f'{source_tag} \"{label[:50]}{\"…\" if len(label) > 50 else \"\"}\": '\n                f'{value} in brief but {other} in {surface}'\n            )\n            break  # one surface report per anchor is enough\n        # Cap noise — don't spam more than 12 hits\n        if len(hits) >= 12:\n            break\n\n    if not hits:\n        return None\n    return f'{len(hits)} companion stale claim(s) flagged: {\" || \".join(hits[:3])}'\n\n\n# ── RUNNER-INTERNAL-BASELINE-CONSISTENCY-01 (2026-05-26) ─────────────────────\n# Sibling to check_companion_brief_consistency above: same stem-fuzzy-anchor +\n# numeric-proximity pattern, scope = brief-INTERNAL contradictions (intra-brief\n# sections carrying different numeric values for the same named-entity claim).\n#\n# Canonical instance: Brief 8 §II Scenario A used stale \"$2T per Bessent Jun\n# 2025\" while §I + §III + §VI + OPERATOR ESSENTIALS used revised \"$3T by 2030\n# Nov 2025\" — same entity, same metric, different numeric value across\n# sections. Resolved evening 2026-05-25 via revision-pair disclosure inside\n# every Bessent mention (\"~$3T by 2030 (Nov 2025 revision from his $2T-by-2028\n# baseline cited Jun 2025)\"). v1 scope = numeric-only intra-brief; thesis-frame\n# drift and date-only drift deferred to v2. Per BRIEF-8-CRITICAL-REVIEW-\n# FOLLOWUP-01 Phase D. WARN tier per [[feedback_runner_calibrate_then_ratchet]].\n\n# Sections to scope for intra-brief consistency. Each entry maps a marker (an\n# anchor id, h2-text marker, or class selector) to a label used in flag output.\n# We use anchor IDs as primary scope boundaries since they survive the lang-en/\n# lang-es duplication cleanly. The check matches across ALL these scopes within\n# a single brief HTML; cross-language drift (en-sec-3 vs es-sec-3 same num\n# different) is intentionally OUT OF SCOPE here (covered by\n# cross_lingual_topic_consistency + bilingual_extended_parity).\n_INTERNAL_SECTION_ANCHORS = (\n    # Format: (anchor_id_substring, label)\n    ('en-sec-3',          '§I EXEC SUMMARY'),\n    ('en-sec-10',         '§II SCENARIOS'),\n    ('en-sec-15',         '§III OPERATIONAL TRAPS'),\n    ('en-sec-17',         '§VI GEOPOLITICAL SCENARIOS'),\n    ('en-sec-implications', '§XV OPERATOR IMPLICATIONS'),\n    ('en-sec-2',          '§XII VERDICT'),\n    ('en-sec-12',         '§XIII CONCLUSION'),\n    ('en-sec-14',         '§XIV FORMAL PREDICTIONS'),\n)\n\n\ndef _extract_brief_sections(html: str) -> list[tuple[str, str]]:\n    \"\"\"Carve `html` into (section_label, section_text) tuples for intra-brief\n    consistency comparison. Includes:\n      - tab-brief KPI/verdict/scenarios region (single concatenated chunk)\n      - each §-anchored h2 region from _INTERNAL_SECTION_ANCHORS (EN scope\n        only — cross-lang covered by other checks)\n\n    Section text is HTML-stripped to plain prose. Returns empty list when\n    no recognisable scaffold (degrades gracefully on legacy briefs).\n    \"\"\"\n    sections: list[tuple[str, str]] = []\n\n    # Cover verdict + KPI grid (above tab-brief). Narrow scope keeps the\n    # headline-tier contradiction surface tight: cover holds the brief's most\n    # prominent thesis numbers (KPI grid + verdict-box copy).\n    cover_m = re.search(r'<div class=\"cover-inner\">(.*?)</div>\\s*</div>', html, re.DOTALL | re.IGNORECASE)\n    if cover_m:\n        cov_text = re.sub(r'<[^>]+>', ' ', cover_m.group(1))\n        cov_text = re.sub(r'\\s+', ' ', cov_text).strip()\n        if cov_text:\n            sections.append(('cover (verdict-box + KPI)', cov_text))\n\n    # tab-brief HEADLINE region — only the brief-tab content BEFORE its\n    # SCENARIOS h3 (Signal paragraphs / KPI repetitions). Excluding the brief-\n    # tab scenarios chunk keeps this scope narrow enough that intra-tab co-\n    # disclosure doesn't mask cross-§ contradictions. Best-effort: cuts at\n    # first <h3 containing \"Scenario\"; falls through to full tab-brief on\n    # legacy briefs without that scaffold.\n    tab_brief_m = re.search(\n        r'<div id=\"tab-brief\"[^>]*>(.*?)(?=<div id=\"tab-full\"|<footer)',\n        html, re.DOTALL | re.IGNORECASE,\n    )\n    if tab_brief_m:\n        tb_inner = tab_brief_m.group(1)\n        # Cut at first brief-tab \"Scenario\" h3 / strong / heading to exclude\n        # the brief-tab scenarios block (which duplicates §II content).\n        scenario_cut = re.search(\n            r'<(?:h2|h3|p)[^>]*>\\s*(?:<strong>\\s*)?Scenario\\s+[ABCD]',\n            tb_inner, re.IGNORECASE,\n        )\n        if scenario_cut:\n            tb_inner = tb_inner[:scenario_cut.start()]\n        tb_text = re.sub(r'<[^>]+>', ' ', tb_inner)\n        tb_text = re.sub(r'\\s+', ' ', tb_text).strip()\n        if tb_text:\n            sections.append(('tab-brief headline (Signals/KPI)', tb_text))\n\n    # Per-section EN anchors\n    for anchor_substr, label in _INTERNAL_SECTION_ANCHORS:\n        # Find h2 with id matching this anchor\n        pat = re.compile(\n            rf'<h2[^>]*id=\"{re.escape(anchor_substr)}\"[^>]*>(.*?)(?=<h2[^>]*id=\"(?:en-sec-|es-sec-)|</div>)',\n            re.DOTALL | re.IGNORECASE,\n        )\n        m = pat.search(html)\n        if not m:\n            continue\n        sec_text = re.sub(r'<[^>]+>', ' ', m.group(1))\n        sec_text = re.sub(r'\\s+', ' ', sec_text).strip()\n        if sec_text:\n            sections.append((label, sec_text))\n\n    return sections\n\n\n# Boilerplate markers to suppress (subscription CTAs, price tags in footer,\n# copyright/license boilerplate). Numbers inside these are not editorial claims.\n_INTERNAL_BOILERPLATE_MARKERS = (\n    'Founding Member', '$19/month', '$99/year', 'Subscribe to Shadow',\n    'Subscribe → ', 'Read the full analysis', 'Creative Commons',\n    'CC BY-NC-SA', 'Shadow Dynamics is a publication',\n)\n\n\ndef _extract_entity_key_for_number(\n    text: str, num_start: int, num_end: int, max_left: int = 80,\n) -> tuple[str, str] | None:\n    \"\"\"Extract a tight (entity_stem, unit_class) key for a numeric match.\n\n    Designed for intra-brief consistency: each metric anchored to ONE named-\n    entity owner (the proper noun whose claim it is) + a unit-class disambig-\n    uator so we don't conflate \"Bessent's $3T projection\" with \"Standard\n    Chartered's 50% reserve share\" or \"$1.5T derived UST exposure\".\n\n    Strategy:\n      1. Scan ≤`max_left` chars LEFT of the number for the nearest possessive\n         proper-noun pattern (`X's` or `by X` or `X —` or `X target`) — that's\n         the claim owner.\n      2. Compute unit-class from the matched number text: '%', 'T' (trillion),\n         'B' (billion), 'M' (million), 'bare' (digit-only).\n      3. Return (entity_stem_lower, unit_class) or None when no clean owner.\n\n    Tight design intentionally LOSES recall to gain precision: synthesised\n    intra-brief anchoring can't match KPI-label precision and must pay\n    conservatism to keep false-positive rate low (calibrate-then-ratchet).\n    \"\"\"\n    if num_start < 0 or num_end > len(text):\n        return None\n    surface = text[num_start:num_end]\n    # Unit class — keyed off suffix letter / % presence / \"billion\"/\"trillion\"/\"million\" word\n    low_surface = surface.lower()\n    if '%' in surface:\n        unit = '%'\n    elif re.search(r'\\btrillion\\b', low_surface) or low_surface.rstrip().endswith('t'):\n        unit = 'T'\n    elif re.search(r'\\bbillion\\b', low_surface) or low_surface.rstrip().endswith('b'):\n        unit = 'B'\n    elif re.search(r'\\bmillion\\b', low_surface) or low_surface.rstrip().endswith('m'):\n        unit = 'M'\n    else:\n        unit = 'bare'\n\n    left = text[max(0, num_start - max_left):num_start]\n    # Patterns ranked by specificity. We deliberately DROP the generic \"last\n    # capitalised word\" fallback — that fallback inflates FP rate by attaching\n    # ambient proper nouns (SWIFT, NATO, EU) to numbers that share no\n    # editorial-claim provenance with them. Explicit attribution is required.\n    pn_patterns = [\n        # \"Bessent's …$3T\" or \"Bessent's projection (~$3T)\" — possessive direct\n        re.compile(r\"\\b([A-Z][a-zA-Z]{3,})['’]s\\b\"),\n        # \"by Bessent — stablecoin market reaching ~$3T\" — dashed attribution\n        re.compile(r'\\bby\\s+([A-Z][a-zA-Z]{3,})\\s+[—\\-]'),\n        # \"Bessent target Aug 2026\" — bare proper-noun preceding claim-noun\n        re.compile(r'\\b([A-Z][a-zA-Z]{3,})\\s+(?:target|projection|estimate|forecast|claim|figure)\\b', re.IGNORECASE),\n        # \"per Bessent\" / \"according to Bessent\" attribution\n        re.compile(r'\\b(?:per|according\\s+to)\\s+([A-Z][a-zA-Z]{3,})\\b'),\n    ]\n    # Universal-skip proper nouns that are NOT entity-owners (months / generics)\n    _PN_SKIP = frozenset({\n        'january','february','march','april','may','june','july','august',\n        'september','october','november','december',\n        'monday','tuesday','wednesday','thursday','friday','saturday','sunday',\n        'spring','summer','autumn','winter','fall',\n        'sector','exposure','scenario','signal','alert','source','threshold',\n        'detail','full','analysis','sources','impact','impacto','escenario',\n        'western','eastern','northern','southern','global','western-aligned',\n        'european','american','asian','gulf','asean','treasury','senate',\n    })\n    for pat in pn_patterns:\n        # Find the LAST occurrence in `left` (closest to the number)\n        last = None\n        for m in pat.finditer(left):\n            last = m\n        if last is None:\n            continue\n        pn = last.group(1)\n        if pn.lower() in _PN_SKIP:\n            continue\n        # Stem to 7-char prefix for morphology tolerance\n        stem = pn[:7].lower() if len(pn) >= 7 else pn.lower()\n        return (stem, unit)\n    return None\n\n\ndef _values_equivalent(a: str, b: str, tol: float = 0.05) -> bool:\n    \"\"\"Return True if numeric tokens `a` and `b` are equivalent within `tol`\n    (default ±5% per spec) — handles rounding-form pairs like '$2T' vs '$2.1T'.\n\n    Returns False on un-parseable inputs (defensive).\n    \"\"\"\n    try:\n        fa, fb = float(a), float(b)\n    except (TypeError, ValueError):\n        return False\n    if fa == 0 and fb == 0:\n        return True\n    if fa == 0 or fb == 0:\n        return False\n    return abs(fa - fb) / max(abs(fa), abs(fb)) <= tol\n\n\ndef check_internal_baseline_consistency(html: str, brand: str | None = None) -> str | None:\n    \"\"\"Detect intra-brief metric contradictions on the same named-entity claim.\n\n    Algorithm:\n      1. Carve brief into key sections (KPI/verdict cover, tab-brief headline,\n         §I/II/III/VI/XII/XIII/XIV/XV anchors).\n      2. For each `_COMPANION_NUMBER_RE` value, extract a tight (entity_stem,\n         unit_class) key via `_extract_entity_key_for_number`. Strict explicit-\n         attribution patterns only: `X's N`, `by X — N`, `X target/projection N`,\n         `per X N`. No generic-capitalised-word fallback (keeps precision high).\n      3. Group all tuples by (entity_stem, unit_class) key.\n      4. A section ASSERTS a cluster iff it contains values from EXACTLY one\n         cluster for that entity-key (no revision-pair co-disclosure). A\n         section that co-discloses ≥2 clusters in tight proximity (e.g., \"$3T\n         (revision from $2T baseline)\") is compatible with all clusters and\n         does NOT trigger a flag.\n      5. Contradiction iff ≥2 different clusters each have ≥1 asserting section.\n      6. Tolerance: ±5% per `_values_equivalent` collapses '$2T' ↔ '$2.1T'.\n\n    No-op (return None) when:\n      - HTML has no recognisable section scaffold (legacy briefs)\n      - No entity-anchor group has cross-section drift\n\n    Returns a string describing N issues with up to 3 samples on failure.\n\n    WARN-tier first instance per [[feedback_runner_calibrate_then_ratchet]].\n    Sibling to `check_companion_brief_consistency` (same stem-fuzzy-anchor +\n    numeric-proximity pattern, scope = brief-INTERNAL).\n    \"\"\"\n    sections = _extract_brief_sections(html)\n    if len(sections) < 2:\n        return None  # not enough scaffold to compare cross-section\n\n    # Build ((entity_stem, unit_class), canonical, section_label, surface)\n    # tuples + per-section co-disclosure index for revision-pair tolerance.\n    tuples: list[tuple[tuple[str, str], str, str, str]] = []\n    # (section_label, entity_key) → set of canonical values present in that\n    # section for that entity-key (revision-pair: a section co-disclosing\n    # both old + new value within tight proximity legitimises both).\n    cofingerprint: dict[tuple[str, tuple[str, str]], set[str]] = {}\n\n    for sec_label, sec_text in sections:\n        # Strip section-level boilerplate in-place (preserve offsets via\n        # blank-fill so numeric proximity windows stay aligned).\n        for marker in _INTERNAL_BOILERPLATE_MARKERS:\n            sec_text = sec_text.replace(marker, ' ' * len(marker))\n        # Find all numeric values in this section\n        num_matches = list(_COMPANION_NUMBER_RE.finditer(sec_text))\n        if not num_matches:\n            continue\n        # For each numeric value, extract tight (entity_stem, unit_class) key.\n        for num_m in num_matches:\n            val = num_m.group(0).strip()\n            canonical = _normalize_numeric_token(val)\n            if not canonical:\n                continue\n            key = _extract_entity_key_for_number(\n                sec_text, num_m.start(), num_m.end(),\n            )\n            if key is None:\n                continue  # no clean entity-owner — skip to keep precision high\n            tuples.append((key, canonical, sec_label, val))\n            cofingerprint.setdefault((sec_label, key), set()).add(canonical)\n\n    if not tuples:\n        return None\n\n    # Group by (entity_stem, unit_class) key\n    by_key: dict[tuple[str, str], list[tuple[str, str, str]]] = {}\n    for key, canon, sec_label, surface in tuples:\n        by_key.setdefault(key, []).append((canon, sec_label, surface))\n\n    hits: list[str] = []\n    for key, entries in by_key.items():\n        # Distinct sections × distinct canonical values\n        sections_seen: dict[str, set[str]] = {}\n        surface_by_canon: dict[str, str] = {}\n        for canon, sec_label, surface in entries:\n            sections_seen.setdefault(sec_label, set()).add(canon)\n            surface_by_canon.setdefault(canon, surface)\n\n        # Need at least 2 distinct sections AND 2+ distinct canonical values\n        if len(sections_seen) < 2:\n            continue\n        all_canon = set().union(*sections_seen.values())\n        if len(all_canon) < 2:\n            continue\n\n        # Collapse equivalent values (±5% tolerance) into clusters\n        sorted_canon = sorted(all_canon, key=lambda x: float(x) if x.replace('.', '').isdigit() else 0)\n        clusters: list[set[str]] = []\n        for c in sorted_canon:\n            placed = False\n            for cluster in clusters:\n                if any(_values_equivalent(c, m) for m in cluster):\n                    cluster.add(c)\n                    placed = True\n                    break\n            if not placed:\n                clusters.append({c})\n\n        if len(clusters) < 2:\n            continue  # all values within ±5% — same baseline\n\n        # Revision-pair tolerance: a section \"co-discloses\" values when it\n        # contains ≥2 different cluster values for the same entity (the\n        # documented-revision pattern, e.g., \"$3T (revision from $2T baseline)\").\n        # Such a section is compatible with EITHER cluster — its presence does\n        # NOT excuse a section that asserts only ONE conflicting value.\n        # Build per-section cluster membership.\n        sec_clusters: dict[str, set[int]] = {}  # sec_label → set(cluster_idx)\n        for sec_label, canon_set in sections_seen.items():\n            for ci, cluster in enumerate(clusters):\n                if cluster & canon_set:\n                    sec_clusters.setdefault(sec_label, set()).add(ci)\n\n        # A section ASSERTS a cluster iff it contains that cluster AND does NOT\n        # contain any other cluster within ±200 chars of the same stem — checked\n        # via cofingerprint (per-section per-stem co-values). If cofingerprint\n        # for the section spans ≥2 clusters, the section is a revision-disclosure\n        # site (compatible with all); if it covers exactly 1 cluster, the section\n        # ASSERTS that cluster.\n        asserting: dict[int, set[str]] = {}  # cluster_idx → set(sec_label)\n        for sec_label, cluster_idxs in sec_clusters.items():\n            co_vals = cofingerprint.get((sec_label, key), set())\n            co_clusters_hit = {ci for ci, cluster in enumerate(clusters) if cluster & co_vals}\n            if len(co_clusters_hit) >= 2:\n                continue  # revision-disclosure site — compatible with all\n            # Single-cluster assertion site (or no co-fingerprint at all)\n            for ci in cluster_idxs:\n                asserting.setdefault(ci, set()).add(sec_label)\n\n        # Contradiction iff ≥2 different clusters each have ≥1 asserting section\n        asserting_clusters = [ci for ci, s in asserting.items() if s]\n        if len(asserting_clusters) < 2:\n            continue\n\n        # Genuine drift: report each asserting cluster + section provenance\n        cluster_reports: list[str] = []\n        for ci in asserting_clusters:\n            cluster = clusters[ci]\n            sects_for_cluster = sorted(asserting[ci])\n            sample_canon = next(iter(cluster))\n            surface = surface_by_canon.get(sample_canon, sample_canon)\n            cluster_reports.append(\n                f'{surface} in {\"+\".join(sects_for_cluster[:2])}'\n                f'{\"…\" if len(sects_for_cluster) > 2 else \"\"}'\n            )\n        if len(cluster_reports) < 2:\n            continue\n        # Reconstruct a readable anchor label from the (entity, unit) key\n        entity_stem, unit_class = key\n        anchor_label = f'{entity_stem} [{unit_class}]'\n        hits.append(\n            f'\"{anchor_label}\": '\n            + ' vs '.join(cluster_reports[:3])\n            + ' — internal baseline contradiction'\n        )\n        if len(hits) >= 8:\n            break\n\n    if not hits:\n        return None\n    return (\n        f'{len(hits)} intra-brief baseline contradiction(s) flagged: '\n        f'{\" || \".join(hits[:3])}'\n    )\n\n\ndef _lang_scope_at(html: str, pos: int) -> str | None:\n    \"\"\"Return the innermost still-open lang-en/lang-es <div> scope at byte\n    `pos`, by walking <div>/</div> from the document start and tracking which\n    open divs carry a lang-en / lang-es class. Returns 'en', 'es', or None\n    (no lang div currently open). Depth-aware — unlike the nearest-preceding\n    rfind heuristic used elsewhere, it correctly accounts for a lang wrapper\n    that has already CLOSED before `pos` (the exact Brief 8 failure mode).\n    \"\"\"\n    stack: list[str | None] = []\n    for m in re.finditer(r'<div\\b[^>]*>|</div\\s*>', html[:pos]):\n        tok = m.group(0)\n        if tok.startswith('</'):\n            if stack:\n                stack.pop()\n        elif re.search(r'class=\"[^\"]*\\blang-es\\b', tok):\n            stack.append('es')\n        elif re.search(r'class=\"[^\"]*\\blang-en\\b', tok):\n            stack.append('en')\n        else:\n            stack.append(None)\n    for lang in reversed(stack):\n        if lang in ('en', 'es'):\n            return lang\n    return None\n\n\ndef check_lang_scope_section_leak(html: str) -> str | None:\n    \"\"\"RUNNER-LANG-SCOPE-SECTION-LEAK-01 (2026-05-29) — detect a\n    language-specific <section> (own heading id en-sec-* / es-sec-*) that is\n    neither tagged with the matching lang-en/lang-es class nor nested inside a\n    still-open matching lang <div>. Such a section renders in BOTH language\n    views, because the in-page setLang() toggle + the `.lang-es{display:none}`\n    CSS default only act on elements carrying a lang-en/lang-es class.\n\n    Canonical instance: Brief 8 (SD_20260522_1354_Payments) — both the English\n    SOURCES and the Spanish FUENTES `<section class=\"sources\">` blocks sat\n    OUTSIDE the lang wrapper divs (which had already closed before them), so the\n    Spanish sources + footnotes rendered in the English view. All L1-L5 +\n    L3 sub-gates missed it; caught only by a reader. Fixed 2026-05-29 by tagging\n    each section with its lang class (`sources lang-en` / `sources lang-es`).\n\n    Both shipped structural patterns pass: lang class on the <section> itself\n    (Brief 8 post-fix) and <section> nested inside <div class=\"lang-XX\"> (the\n    Critical Minerals / DORA pattern).\n\n    BLOCKER-class (operator directive 2026-05-29 — \"avoid such a mistake in\n    future briefs\"). Wired straight to BLOCKER rather than the usual\n    calibrate-then-ratchet WARN window because the defect is deterministic with\n    a zero false-positive surface — a section carrying an es-sec-*/en-sec-*\n    heading is by definition language-specific, so rendering it in the other\n    language view is always wrong — and the check is already clean across the\n    full 8-brief corpus (both shipped valid patterns whitelisted). Mirrors the\n    no_internal_backlog_refs precedent (BLOCKER from inception, operator call).\n    \"\"\"\n    leaks: list[str] = []\n    for m in re.finditer(r'<section\\b[^>]*>', html):\n        sec_tag = m.group(0)\n        end = html.find('</section>', m.end())\n        body = html[m.end():end if end != -1 else m.end() + 2000]\n        h = re.search(r'<h2\\b[^>]*id=\"(en|es)-sec-[^\"]*\"', body)\n        if not h:\n            continue\n        lang = h.group(1)\n        cls_m = re.search(r'class=\"([^\"]*)\"', sec_tag)\n        cls_tokens = (cls_m.group(1).split() if cls_m else [])\n        if f'lang-{lang}' in cls_tokens:\n            continue  # tagged on the <section> itself\n        if _lang_scope_at(html, m.start()) == lang:\n            continue  # nested inside a matching open lang <div>\n        hdr = re.search(r'<h2\\b[^>]*>(.*?)</h2>', body)\n        label = (re.sub(r'<[^>]+>', '', hdr.group(1)).strip()[:40]\n                 if hdr else f'{lang}-sec')\n        leaks.append(f'{lang}: \"{label}\"')\n    if leaks:\n        return (\n            f'language-scoped <section> outside matching lang scope — '\n            f'{len(leaks)} section(s) render in BOTH language views: '\n            f'{leaks[:6]}{\"...\" if len(leaks) > 6 else \"\"}. Add the matching '\n            f'lang-en/lang-es class to the <section> (or nest it inside the '\n            f'lang wrapper div). Canonical: Brief 8 SOURCES+FUENTES leak, '\n            f'fixed 2026-05-29 (RUNNER-LANG-SCOPE-SECTION-LEAK-01).'\n        )\n    return None\n\n\n# ── runner ───────────────────────────────────────────────────────────────────\n\n@dataclass\nclass CheckResult:\n    name: str\n    severity: str       # \"blocker\" | \"warning\"\n    passed: bool\n    detail: str | None  # failure reason, or None on pass\n\n\nBLOCKERS: list[tuple[str, Callable[[str], str | None]]] = [\n    ('scaffold_pseudo_citations',  check_scaffold_pseudo_citations),\n    ('result_label_duplicate',     check_result_label_duplicate),\n    ('preflight_section_leak',     check_preflight_section_leak),\n    ('event_date_leak',            check_event_date_leak),\n    ('inline_paywall',             check_inline_paywall),\n    ('old_branding',               check_old_branding),\n    ('about_scaffold_label_list',  check_about_scaffold_label_list),\n    ('hero_scaffold_pill',         check_hero_scaffold_pill),\n    ('section_count_parity',       check_section_count_parity),\n    ('unbalanced_tables',          check_unbalanced_tables),\n    # EDITORIAL-FOOTNOTE-01 F5 — promoted from WARNING to BLOCKER 2026-04-28 noche.\n    # The check is conditional: if a brief has zero <sup> and zero <li id=\"fn-N\">\n    # it returns None (no-op). Legacy briefs without footnote markup pass\n    # trivially; new briefs (post-F3 commit 8235112) emit the pattern and any\n    # malformed footnote graph (orphan refs, gaps, duplicates) blocks publish.\n    ('footnote_integrity',         check_footnote_integrity),\n    # EDITORIAL-LEGAL-02 L02 — bilingual regulatory disclaimer regression guard.\n    # Wired as BLOCKER from inception: the disclaimer is rendered by the\n    # global footer template (commit 687d8f8) so all 5 published briefs pass\n    # by construction. Failure = template rollback, not editorial drift.\n    ('regulatory_disclaimer_present', check_regulatory_disclaimer_present),\n    # EDITORIAL-LEGAL-02 P02 — bilingual structural parity beyond h2.\n    # Extends section_count_parity to h3/table/p. Tolerances calibrated\n    # against the 5-brief corpus on 2026-05-02; Brief 5 (Δ4 on 63 p tags)\n    # passes with margin under p tolerance max(8, 20%×larger).\n    ('bilingual_extended_parity',  check_bilingual_extended_parity),\n    # EDITORIAL-LEGAL-02 S01 — inference-citation ratio guard. Pairs with the\n    # EDITORIAL-PROMPT INFERENCE SOURCE TRANSPARENCY rule. Threshold ≥4 sup\n    # refs + ratio > 25% to avoid noise on light-footnote briefs.\n    ('inference_citation_ratio',   check_inference_citation_ratio),\n    # EDITORIAL-FOOTER-DISCIPLINE-01 — defensive regression guard scoped to\n    # <footer.site-footer> + <div.cover-meta>. Closes the gap left by the\n    # 2026-04-30 legal patch (commit 2dddef7): the name-drop was deleted\n    # but nothing prevents a future Forecaster Format Output edit from\n    # silently reintroducing it. Body-level citations of WEF/Eurasia\n    # remain legitimate nominative use (different scope).\n    ('footer_namedrop_discipline', check_footer_namedrop_discipline),\n    # RUNNER-NO-INTERNAL-BACKLOG-REFS-01 (2026-05-23 PM) — pairs with EDITORIAL_PROMPT R26.\n    # Forbids [BACKLOG: X] / \"Brief N question\" / internal ticket IDs in published content.\n    # Origin: Brief 8 v4 emitted 6× [BACKLOG: BRIEF-10] + 2× \"Brief 10 question\" hardcoded,\n    # manually cleaned 2026-05-23 PM. BLOCKER from inception (operator decision).\n    ('no_internal_backlog_refs',   check_no_internal_backlog_refs),\n    # RUNNER-LANG-SCOPE-SECTION-LEAK-01 (2026-05-29) — a language-scoped\n    # <section> (heading id en-sec-*/es-sec-*) neither tagged with the matching\n    # lang class nor nested inside an open lang div renders in BOTH language\n    # views. Canonical: Brief 8 SOURCES+FUENTES + Brief 7 DORA §XII FUENTES\n    # leaked Spanish into the EN view (fixed by tagging each section). Wired to\n    # BLOCKER from inception (operator directive 2026-05-29): deterministic,\n    # zero false-positive surface, clean across the full corpus, both shipped\n    # valid patterns (class-on-section + nested-in-lang-div) whitelisted.\n    ('lang_scope_section_leak',    check_lang_scope_section_leak),\n]\n\ndef check_empty_styled_elements(html: str) -> str | None:\n    \"\"\"EMPTY-ELEMENT-RUNNER-CHECK-01: detect CSS-classed empty elements\n    that survive editorial surgery as orphans.\n\n    Canonical instance 2026-05-17 PM (commit fa1faa2): Brief 7 2151 had\n    <div class=\"cover-meta\"></div> orphaned at line 47 (badges above\n    already carried the meta). Caught only by careful editorial review.\n\n    Pattern: <(div|span|section|nav) ... class=\"...\" ...></\\\\1> with\n    no inner content (only optional whitespace).\n\n    Whitelist: legitimate-empty elements used as JS-populated containers\n    can opt-out via adjacent <!-- intentionally-empty --> comment marker\n    (placed on the line BEFORE the empty element).\n\n    WARNING-class. Cosmetic + DOM bloat, not promote-blocking.\n    \"\"\"\n    pattern = re.compile(\n        r'<(div|span|section|nav)\\s+[^>]*class=\"[^\"]*\"[^>]*>\\s*</\\1>',\n        re.IGNORECASE,\n    )\n    whitelist_marker = '<!-- intentionally-empty -->'\n    hits = []\n    for m in pattern.finditer(html):\n        # Check if preceded by whitelist marker within 100 chars\n        ctx_before = html[max(0, m.start() - 100):m.start()]\n        if whitelist_marker in ctx_before:\n            continue\n        elem = m.group(0)\n        # 2026-06-01 FP-precision: an empty element carrying an `id` is an\n        # ANCHOR/REFERENCE TARGET (legitimately empty by purpose, e.g. the\n        # `<span id=\"<lang>-sec-predictions\" class=\"sec-alias\">` that\n        # Format Output's _ensurePredictionsAnchor injects so the TOC link\n        # resolves after _injectH2Ids renumbers the h2). Anchor targets are\n        # not \"invisible bloat\" — skip them (they have a structural reason\n        # to exist and to be empty).\n        if re.search(r'\\sid=\"[^\"]+\"', elem):\n            continue\n        # Extract class for reporting (first class only)\n        class_match = re.search(r'class=\"([^\"\\s]+)', elem)\n        cls = class_match.group(1) if class_match else 'unknown'\n        tag = m.group(1).lower()\n        hits.append(f'<{tag} class=\"{cls}\">')\n    if hits:\n        return (\n            f'empty CSS-classed element(s) — {len(hits)} orphan(s): '\n            f'{hits[:5]}{\"...\" if len(hits) > 5 else \"\"}. '\n            f'These render invisibly + bloat DOM. Either remove the empty '\n            f'element OR add <!-- intentionally-empty --> marker on the '\n            f'preceding line if the element is a JS-populated container. '\n            f'Canonical instance: Brief 7 2151 cover-meta orphan (fixed '\n            f'commit fa1faa2).'\n        )\n    return None\n\n\n# RUNNER-FIRST-USE-EXPANSION-CHECK-LF20-01 (2026-05-17 PM): defensive guard\n# mechanizing ACRONYM-FIRST-USE-EXPANSION-CANONICAL-01 (Thread B canon shipped\n# commit 582583b). 16 first-use omissions in Brief 7 pre-retrofit → operator\n# catch threshold exceeded per [[feedback_operator_catch_threshold_triggers_mechanization_2026-05-17]].\n# Parses ACRONYMS/ACRÓNIMOS table from doc, extracts terms + expansions,\n# verifies each non-exception term's first occurrence is preceded by expansion\n# within ±200 chars. Universal-exception list = common acronyms reader assumed\n# to know without expansion.\n_ACRONYM_UNIVERSAL_EXCEPTIONS = frozenset({\n    'EU', 'US', 'UK', 'USA', 'AWS', 'GDP', 'PIB', 'IMF', 'FMI',\n    'NATO', 'OTAN', 'OECD', 'OCDE', 'WTO', 'OMC', 'UN', 'ONU',\n    'ECB', 'BCE', 'FED', 'CC', 'PDF', 'HTML', 'CSS', 'URL', 'API',\n    'AI', 'IA', 'CEO', 'CFO', 'CTO', 'CIO', 'PM',\n    # 2026-05-31 density FP-audit: 'PRED' is a structural prediction-block\n    # label (parallels SIGNAL/SCENARIO), not an editorial acronym; 'T-bill(s)'\n    # is a near-universal finance term. Whitelisting clears recognizer FPs.\n    'PRED', 'T-bill', 'T-bills', 'T-Bill', 'T-Bills',\n})\n\n\ndef check_acronym_first_use_expansion(html: str) -> str | None:\n    \"\"\"Flag acronyms in ACRONYMS table whose first body occurrence lacks\n    long-form expansion within ±200 chars.\n\n    Scope: parses EN ACRONYMS table (en-sec-14); checks against body content\n    BEFORE the ACRONYMS section (since the table itself is the definition).\n    \"\"\"\n    # Find EN ACRONYMS table\n    table_match = re.search(\n        r'<h2[^>]*id=\"en-sec-14\"[^>]*>[^<]*ACRONYMS[^<]*</h2>\\s*<table[^>]*>(.*?)</table>',\n        html, re.DOTALL | re.IGNORECASE,\n    )\n    if not table_match:\n        return None  # no ACRONYMS section — no-op\n    table_html = table_match.group(1)\n    table_pos = table_match.start()\n    # Scope to <body> content only (avoid <head> meta tags polluting checks)\n    body_start = html.find('<body')\n    body_start = body_start if body_start >= 0 else 0\n    body_before_table = html[body_start:table_pos]\n\n    # Parse rows: each <tr> has <td>Term</td><td>Expansion</td>...\n    rows = re.findall(r'<tr>(.*?)</tr>', table_html, re.DOTALL)\n    if len(rows) < 2:\n        return None  # no data rows\n    missing = []\n    for row in rows[1:]:  # skip header row\n        cells = re.findall(r'<t[dh][^>]*>(.*?)</t[dh]>', row, re.DOTALL)\n        if len(cells) < 2:\n            continue\n        term_cell = re.sub(r'<[^>]+>', '', cells[0]).strip()\n        expansion_cell = re.sub(r'<[^>]+>', '', cells[1]).strip()\n        # Term may be \"EBA / ABE\" — take FIRST form (EN)\n        term = term_cell.split('/')[0].strip()\n        if not term or term in _ACRONYM_UNIVERSAL_EXCEPTIONS:\n            continue\n        # Expansion may be \"Long EN / Long ES\" — take FIRST form (EN)\n        expansion_en = expansion_cell.split('/')[0].strip()\n        if not expansion_en or len(expansion_en) < 5:\n            continue\n        # Find first occurrence of term in body_before_table (word boundary)\n        term_pattern = re.compile(r'\\b' + re.escape(term) + r'\\b')\n        m = term_pattern.search(body_before_table)\n        if not m:\n            continue  # term not in body before its definition — skip (likely abbreviated in body but defined for completeness)\n        # Check ±200 char window for the genuine long-form expansion.\n        # 2026-05-31 density FP-audit + adversarial-audit follow-up: the\n        # expansion cell may be long-form-first (\"e-money token (MiCA gloss)\")\n        # OR acronym-first (\"EMT (e-money token under MiCA)\"), and the body may\n        # use a shorter / re-ordered form than the table. A brittle 25-char\n        # prefix caused BOTH false positives (the table author's parenthetical\n        # gloss polluting the key — EMT/UST) AND, after a naive gloss-strip, a\n        # false negative (acronym-first cells self-matching the acronym). So\n        # derive candidate long-form phrases — the part before '(' and the part\n        # inside '(...)' — drop any that is just the acronym, and require the\n        # body window to contain the first two words of SOME candidate.\n        term_norm = term.replace('-', '').replace(' ', '').upper()\n        cand_parts = [expansion_en.split('(')[0]]\n        _paren = re.search(r'\\(([^)]*)\\)', expansion_en)\n        if _paren:\n            cand_parts.append(_paren.group(1))\n        keys = []\n        for part in cand_parts:\n            words = [w for w in re.split(r'\\s+', part.strip()) if w]\n            if len(words) >= 2 and ''.join(words).replace('-', '').upper() != term_norm:\n                keys.append(' '.join(words[:2]).lower())\n        if not keys:  # single-word expansion — fall back to a short prefix\n            keys = [expansion_en[:20].lower()]\n        window_start = max(0, m.start() - 200)\n        window_end = min(len(body_before_table), m.end() + 200)\n        window = body_before_table[window_start:window_end].lower()\n        if not any(k in window for k in keys):\n            missing.append(f'{term} ({expansion_en[:40]}{\"…\" if len(expansion_en)>40 else \"\"})')\n    if not missing:\n        return None\n    sample = missing[:5]\n    more = f' (+{len(missing)-5} more)' if len(missing) > 5 else ''\n    return (\n        f'ACRONYM first-use expansion gap: {len(missing)} acronym(s) appear '\n        f'in body before ACRONYMS table without long-form expansion in ±200 chars. '\n        f'Samples: {\", \".join(sample)}{more}. Per ACRONYM-FIRST-USE-EXPANSION-CANONICAL-01, '\n        f'each non-universal acronym must be expanded on first occurrence '\n        f'(e.g., \"Critical ICT Third-Party Provider (CTPP)\").'\n    )\n\n\n# MERMAID-PER-FEEDBACK-LOOP-RUNNER-CHECK-01 (2026-05-17 PM): defensive guard\n# pairing EDITORIAL-PROMPT-FEEDBACK-LOOPS-DIAGRAM-PER-LOOP-01 prompt rule.\n# Per [[feedback_sonnet_ignores_section_mandates_2026-05-16]], prompt mandates\n# alone are unreliable — runner catches misses at quality gate.\n# Pattern: count [SELF-REINFORCING] / [AUTO-REFORZANTE] prose markers vs\n# <pre class=\"mermaid\"> blocks within §3-quater FEEDBACK LOOPS section bounds\n# (h2 id=\"en-sec-feedback\" or \"es-sec-feedback\" to next h2).\n# Canonical instance: 2151 §3-quater had 3 loop prose, 1 mermaid (operator-caught\n# 2026-05-17 PM; 2 missing diagrams added in commit ddbd347).\n_FEEDBACK_SECTION_HEADINGS = (\n    'en-sec-feedback',\n    'es-sec-feedback',\n)\n_LOOP_PROSE_PATTERNS = (\n    '[SELF-REINFORCING]',\n    '[AUTO-REFORZANTE]',\n    '[AUTOREFORZANTE]',  # accent-strip tolerance\n)\n\n\ndef check_mermaid_per_feedback_loop(html: str) -> str | None:\n    \"\"\"Flag FEEDBACK LOOPS sections where Mermaid diagram count < loop prose count.\n\n    Each named self-reinforcing loop in §3-quater must be accompanied by a\n    Mermaid flowchart per EDITORIAL-PROMPT-FEEDBACK-LOOPS-DIAGRAM-PER-LOOP-01.\n    Bilingual: checks EN and ES sections independently (each language's\n    diagrams live inside its lang scope).\n    \"\"\"\n    issues = []\n    for anchor in _FEEDBACK_SECTION_HEADINGS:\n        # Find h2 with this anchor\n        anchor_marker = f'id=\"{anchor}\"'\n        start = html.find(anchor_marker)\n        if start < 0:\n            continue  # section not present in this brief\n        # Find next <h2 after the section opens\n        section_end = html.find('<h2', start + len(anchor_marker))\n        if section_end < 0:\n            section_end = len(html)\n        section = html[start:section_end]\n        # Count loop prose markers\n        loop_count = sum(section.count(pat) for pat in _LOOP_PROSE_PATTERNS)\n        # Count Mermaid diagram blocks\n        mermaid_count = section.count('<pre class=\"mermaid\">')\n        if loop_count == 0:\n            continue  # no loops declared — skip (graceful for briefs without this section)\n        if mermaid_count < loop_count:\n            lang_label = 'EN' if 'en-' in anchor else 'ES'\n            issues.append(\n                f'{lang_label} §3-quater FEEDBACK LOOPS: {loop_count} loop(s) '\n                f'declared but only {mermaid_count} Mermaid diagram(s)'\n            )\n    if not issues:\n        return None\n    return (\n        f'FEEDBACK LOOPS diagram-per-loop gap: {\"; \".join(issues)}. '\n        f'Per EDITORIAL-PROMPT-FEEDBACK-LOOPS-DIAGRAM-PER-LOOP-01, each '\n        f'self-reinforcing loop must be accompanied by a Mermaid flowchart '\n        f'(canonical pattern: chart-wrap > pre.mermaid > flowchart LR with '\n        f'terminal node back-edge to N1). Canonical instance: 2151 §3-quater '\n        f'(operator-caught 2026-05-17 PM; commit ddbd347 added missing diagrams).'\n    )\n\n\ndef _canonical_diff_findings(html: str):\n    \"\"\"Shared canonical-diff codepath for the structure WARN and the MUST-FIX\n    promote gate. Returns the findings dict, or None for a no-op (legacy\n    single-tab brief, or missing spec/tooling). Raises on a genuine diff error.\"\"\"\n    if 'id=\"tab-brief\"' not in html or 'id=\"tab-full\"' not in html:\n        return None\n    spec_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),\n                              'data', 'canonical_brief_structure.yaml')\n    if not os.path.exists(spec_path):\n        return None\n    import yaml as _yaml\n    with open(spec_path) as f:\n        spec = _yaml.safe_load(f)\n    import importlib.util\n    diff_script = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                                'canonical-brief-diff.py')\n    if not os.path.exists(diff_script):\n        return None\n    spec_mod = importlib.util.spec_from_file_location('canonical_brief_diff', diff_script)\n    cbd = importlib.util.module_from_spec(spec_mod)\n    spec_mod.loader.exec_module(cbd)\n    return cbd.diff_brief_against_spec(html, spec)\n\n\ndef check_canonical_brief_must_fix(html: str) -> str | None:\n    \"\"\"RUNNER-CANONICAL-MUST-FIX-GATE-01 (2026-05-30) — MUST-FIX-only sibling of\n    check_canonical_brief_structure, intended for elevation to a promote blocker\n    via brief-saver PROMOTE_BLOCKERS. Fires (non-None) ONLY when the canonical\n    diff reports >=1 MUST-FIX, so SHOULD-FIX-only briefs (e.g. Brief 8) still\n    promote and the historical corpus is unaffected. Closes the Brief 9 RCA gap\n    (2026-05-30) where a structural regression — dropped mermaids, thin\n    brief-tab, missing disclaimer — passed promote at 0 blockers. Fail-open: a\n    diff-tooling error never blocks promote.\"\"\"\n    try:\n        findings = _canonical_diff_findings(html)\n    except Exception:\n        return None\n    if not findings:\n        return None\n    must_fix = findings.get('must_fix', [])\n    if not must_fix:\n        return None\n    return (\n        f'{len(must_fix)} canonical MUST-FIX item(s) (sample: '\n        f'{\" | \".join(must_fix[:3])}). Run '\n        '`python3 scripts/canonical-brief-diff.py <brief>` for the full report.'\n    )\n\n\ndef check_single_glance_coverage(html: str) -> str | None:\n    \"\"\"RUNNER-SINGLE-DIGEST-01 (2026-05-30) — EDITORIAL-GLANCE-DIGEST-01 and\n    EDITORIAL-COVERAGE-BALANCE-01 require EXACTLY ONE SD-GLANCE and ONE\n    SD-COVERAGE comment block on the cover. Flags duplicates (Brief 9 emitted\n    each 3x pre-R27). Presence-conditional: silent when zero (legacy briefs).\"\"\"\n    import re as _re\n    counts = {\n        'SD-GLANCE': len(_re.findall(r'SD-GLANCE\\s*:', html)),\n        'SD-COVERAGE': len(_re.findall(r'SD-COVERAGE\\s*:', html)),\n        'SD-SLUG': len(_re.findall(r'SD-SLUG\\s*:', html)),\n        'SD-PRED-CAL': len(_re.findall(r'SD-PRED-CAL\\s*:', html)),\n    }\n    parts = [f'{n} {name} blocks (expected 1)' for name, n in counts.items() if n > 1]\n    if not parts:\n        return None\n    return ('duplicate cover digest blocks: ' + '; '.join(parts) +\n            ' — emit exactly one SD-GLANCE and one SD-COVERAGE on the cover')\n\n\n# ── EDITORIAL-SELF-SLUG-01 — slug_topic_consistency (2026-05-30) ─────────────\n# The brief self-declares its filing identity on the cover via\n#   <!-- SD-SLUG: {\"en\":\"...\",\"es\":\"...\",\"slug\":\"...\"} -->\n# (Format Output PREFERS this over the keyword map; FORMAT-OUTPUT-PREFER-SELF-SLUG-01).\n# This check confirms the SD-SLUG.en / <title> agree with the brief's OWN\n# \"BRIEF N — TOPIC\" header — the gate that would have surfaced the Brief 9\n# Stablecoins -> Payments_Infra mis-name at quality-check time. WARN-only.\n_SD_SLUG_RE = re.compile(r'<!--\\s*SD-SLUG:\\s*(\\{.*?\\})\\s*-->', re.S)\n_SD_COVERAGE_TITLE_RE = re.compile(r'<!--\\s*SD-COVERAGE:\\s*(\\{.*?\\})\\s*-->', re.S)\n_TITLE_RE = re.compile(r'<title>(.*?)</title>', re.S | re.I)\n_H1_BRIEF_TOPIC_RE = re.compile(\n    r'<(?:h1|h2|li|a)[^>]*>\\s*(?:BRIEF|INFORME)\\s+\\d+\\s*[—–\\-]\\s*(.+?)</(?:h1|h2|li|a)>',\n    re.S | re.I,\n)\n_TITLE_PREFIX_RE = re.compile(r'^\\s*shadow dynamics\\s*:?\\s*', re.I)\n_TITLE_SUFFIX_RE = re.compile(r'\\s*(?:·\\s*shadow dynamics.*|20\\d{2})\\s*$', re.I)\n\n\ndef _normalise_topic(s: str) -> str:\n    if not s:\n        return ''\n    s = re.sub(r'<[^>]+>', ' ', s)\n    s = re.split(r'[:—–]', s, 1)[0]\n    s = s.lower()\n    for a, b in (('á', 'a'), ('é', 'e'), ('í', 'i'), ('ó', 'o'),\n                 ('ú', 'u'), ('ñ', 'n')):\n        s = s.replace(a, b)\n    s = re.sub(r'[^a-z0-9 ]+', ' ', s)\n    return re.sub(r'\\s+', ' ', s).strip()\n\n\ndef _content_words(s: str) -> set:\n    stop = {'the', 'and', 'for', 'with', 'una', 'los', 'las', 'del', 'via',\n            'de', 'la', 'el', 'en', 'shadow', 'dynamics', 'brief', 'informe',\n            '2026', '2027'}\n    return {w for w in _normalise_topic(s).split() if len(w) >= 4 and w not in stop}\n\n\ndef check_slug_topic_consistency(html: str) -> str | None:\n    \"\"\"EDITORIAL-SELF-SLUG-01 — warn when the self-declared slug/title disagrees\n    with the brief's own \"BRIEF N — TOPIC\" header. Presence-conditional, WARN-only.\"\"\"\n    failures = []\n    self_topic = None\n    m = _SD_SLUG_RE.search(html)\n    if m:\n        try:\n            sd = json.loads(m.group(1))\n        except (ValueError, TypeError):\n            return 'SD-SLUG block does not parse as JSON (EDITORIAL-SELF-SLUG-01)'\n        if not isinstance(sd, dict):\n            return 'SD-SLUG block is not a JSON object (EDITORIAL-SELF-SLUG-01)'\n        missing = [k for k in ('en', 'es', 'slug')\n                   if not (isinstance(sd.get(k), str) and sd.get(k).strip())]\n        if missing:\n            failures.append('SD-SLUG missing/empty: ' + ', '.join(missing))\n        slug = str(sd.get('slug', ''))\n        if slug and not re.fullmatch(r'[A-Za-z0-9]+(?:_[A-Za-z0-9]+)*', slug):\n            failures.append(\n                f'SD-SLUG.slug must be Title_Case_Underscored ASCII (got {slug!r})')\n        self_topic = sd.get('en') if isinstance(sd.get('en'), str) else None\n    if not self_topic:\n        tm = _TITLE_RE.search(html)\n        if tm:\n            t = _TITLE_SUFFIX_RE.sub('', _TITLE_PREFIX_RE.sub('', tm.group(1))).strip()\n            self_topic = t or None\n    hm = _H1_BRIEF_TOPIC_RE.search(html)\n    h1_topic = hm.group(1).strip() if hm else None\n    if not h1_topic:\n        cm = _SD_COVERAGE_TITLE_RE.search(html)\n        if cm:\n            try:\n                cov = json.loads(cm.group(1))\n                ct = cov.get('title') if isinstance(cov, dict) else None\n                if isinstance(ct, str) and ct.strip():\n                    h1_topic = re.sub(\n                        r'^\\s*(?:brief|informe)\\s+\\d+\\s*[—–\\-]\\s*', '',\n                        ct.strip(), flags=re.I) or ct.strip()\n            except (ValueError, TypeError):\n                pass\n    if self_topic and h1_topic:\n        sw = _content_words(self_topic)\n        hw = _content_words(h1_topic)\n        if sw and hw and not (sw & hw):\n            failures.append(\n                'cover topic vs H1 mismatch: self-declared '\n                + repr(_normalise_topic(self_topic))\n                + ' shares no content word with brief H1 '\n                + repr(_normalise_topic(h1_topic))\n                + ' (mis-slug class — e.g. Brief 9 Stablecoins named Payments_Infra)')\n    if not failures:\n        return None\n    return ('SLUG/topic inconsistency (EDITORIAL-SELF-SLUG-01): '\n            + '; '.join(failures))\n\n\ndef check_prediction_calibration(html: str) -> str | None:\n    \"\"\"EDITORIAL-PREDICTION-CALIBRATION-SELFDECL-01 (2026-05-30) — automated\n    prediction-calibration gate. Parses the cover SD-PRED-CAL block (the model's\n    machine-readable base-rate anchors for each formal prediction) and runs:\n    (a) deterministic NOT-ALREADY-TRUE — current_value already crosses the\n    threshold (catches the Brief 9 top-20-already-true class at zero LLM cost);\n    (b) calibration via evaluate-prediction-exante — data-derived q vs stated_p,\n    flagging over-confident predictions. Presence-conditional (silent if no\n    SD-PRED-CAL — legacy briefs). WARN here; elevated to a promote blocker via\n    PROMOTE_BLOCKERS so an already-true / over-confident prediction cannot ship.\n    This is the automated replacement for the manual spec->exante->inject dance.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return ('SD-PRED-CAL block does not parse as JSON '\n                '(EDITORIAL-PREDICTION-CALIBRATION-SELFDECL-01)')\n    if not isinstance(preds, list):\n        return 'SD-PRED-CAL block is not a JSON array'\n    exante = None\n    try:\n        import importlib.util as _ilu\n        _p = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                          'evaluate-prediction-exante.py')\n        _spec = _ilu.spec_from_file_location('_exante_engine', _p)\n        exante = _ilu.module_from_spec(_spec)\n        _spec.loader.exec_module(exante)\n    except Exception:\n        exante = None\n    failures = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        pid = p.get('id', '?')\n        cv, thr, d = p.get('current_value'), p.get('threshold'), p.get('direction')\n        # (a) deterministic NOT-ALREADY-TRUE / NOT-ALREADY-CONTRADICTED.\n        # \"above\" asserts the value WILL EXCEED thr -> already true if it already does.\n        # \"below\" asserts the value STAYS UNDER thr (a persistence claim) -> current\n        # being under thr is the PREMISE, not an already-resolved outcome; it is only\n        # broken if the value already EXCEEDS thr. Flagging a \"stays below\" claim whose\n        # current value is under the threshold was a false-positive that blocked\n        # legitimate persistence predictions (Brief 9 PRED 2 \"EUR stays below 5%\",\n        # current 0.8% — 2026-05-30).\n        if isinstance(cv, (int, float)) and isinstance(thr, (int, float)):\n            if d == 'above' and cv >= thr:\n                failures.append(\n                    f'{pid}: ALREADY TRUE at issue (current {cv} already exceeds {thr}) '\n                    '— not a forecast; re-scope threshold/horizon')\n                continue\n            if d == 'below' and cv > thr:\n                failures.append(\n                    f'{pid}: ALREADY CONTRADICTED at issue (current {cv} already exceeds '\n                    f'{thr}; a \"stays below\" claim is already violated) — re-scope')\n                continue\n        # (b) calibration via the exante engine (needs a trajectory)\n        sp = p.get('stated_p')\n        series = p.get('series')\n        if exante and isinstance(series, list) and len(series) >= 3 and isinstance(sp, (int, float)):\n            try:\n                spec = {\n                    'id': pid, 'claim': p.get('claim', ''),\n                    'stated_p': sp / 100.0 if sp > 1 else sp,\n                    'direction': d, 'threshold': thr,\n                    'resolve_date': p.get('resolve_date'), 'series': series,\n                }\n                # PRED-MARKET-Q-ANCHOR-01 (2026-05-30): where the entry carries a\n                # liquid-market implied probability, pass it through (normalised\n                # to 0-1) so the data-derived q blends the market anchor.\n                mq = p.get('market_q')\n                if isinstance(mq, (int, float)):\n                    spec['market_q'] = mq / 100.0 if mq > 1 else mq\n                r = exante.evaluate(spec)\n                if 'OVER' in r.get('verdict', ''):\n                    failures.append(\n                        f\"{pid}: OVER-CONFIDENT (stated {sp} vs data-derived \"\n                        f\"q~={round(r['q_blend'] * 100)})\")\n            except Exception:\n                pass\n    if not failures:\n        return None\n    return ('prediction calibration (EDITORIAL-PREDICTION-CALIBRATION-SELFDECL-01): '\n            + '; '.join(failures))\n\n\ndef _load_exante_engine():\n    \"\"\"Load the ex-ante prediction engine (PRED-EXANTE-LOGLINEAR-01). Returns the\n    module or None if unavailable. Mirrors the inline loader in\n    check_prediction_calibration; shared by prediction_calibration_targets.\"\"\"\n    try:\n        import importlib.util as _ilu\n        _p = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                          'evaluate-prediction-exante.py')\n        _spec = _ilu.spec_from_file_location('_exante_engine', _p)\n        mod = _ilu.module_from_spec(_spec)\n        _spec.loader.exec_module(mod)\n        return mod\n    except Exception:\n        return None\n\n\ndef prediction_calibration_targets(html: str) -> list:\n    \"\"\"Structured per-prediction calibration targets (STAGE2-GENCORE-CALIBRATION-LOOP-01).\n\n    Machine-readable companion to check_prediction_calibration: instead of a prose\n    WARN string, returns the data the gen-core calibration loop (or an operator\n    re-fire) injects back into generation. For each formal prediction carrying a\n    usable >=3-point trajectory + numeric stated_p, returns:\n        {id, stated_p (0-100), q_blend (0-100, honest data-derived q),\n         direction, threshold, unit, suggested_threshold (re-scoped so q~=0.55),\n         miscalibrated (|stated_p/100 - q_blend| > 0.12)}\n    Reuses the exante engine (geometric-aware q via PRED-EXANTE-LOGLINEAR-01). The\n    suggested_threshold is the value on a 0.3x-1.2x grid whose data-derived q is\n    closest to 0.55 — a defensible, differentiated call. Empty list if no\n    SD-PRED-CAL block, it doesn't parse, or the engine is unavailable.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return []\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return []\n    if not isinstance(preds, list):\n        return []\n    exante = _load_exante_engine()\n    if exante is None:\n        return []\n    out = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        sp, series = p.get('stated_p'), p.get('series')\n        d, thr = p.get('direction'), p.get('threshold')\n        if not (isinstance(series, list) and len(series) >= 3\n                and isinstance(sp, (int, float)) and isinstance(thr, (int, float))):\n            continue\n        spn = sp / 100.0 if sp > 1 else sp\n        base = {'id': p.get('id', '?'), 'claim': p.get('claim', ''),\n                'direction': d, 'resolve_date': p.get('resolve_date'), 'series': series}\n        try:\n            q = exante.evaluate({**base, 'stated_p': spn, 'threshold': float(thr)})['q_blend']\n        except Exception:\n            continue\n        suggested = None\n        try:\n            best = None\n            for f in (0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2):\n                cand = float(thr) * f\n                qq = exante.evaluate({**base, 'stated_p': 0.5, 'threshold': cand})['q_blend']\n                if best is None or abs(qq - 0.55) < abs(best[1] - 0.55):\n                    best = (cand, qq)\n            suggested = round(best[0], 2) if best else None\n        except Exception:\n            suggested = None\n        out.append({\n            'id': p.get('id', '?'), 'stated_p': round(spn * 100),\n            'q_blend': round(q * 100), 'direction': d, 'threshold': thr,\n            'unit': p.get('unit'), 'suggested_threshold': suggested,\n            'miscalibrated': abs(spn - q) > 0.12,\n        })\n    return out\n\n\ndef check_market_divergence(html: str) -> str | None:\n    \"\"\"EDITORIAL-PREDICTION-MARKET-ANCHOR-01 (2026-05-30) — liquid-market\n    divergence flag. Parses the cover SD-PRED-CAL block; for any entry that\n    carries an optional `market_q` (the implied YES probability of a LIQUID\n    Polymarket/Kalshi market for the same outcome — accepted as 0-1 or 0-100,\n    normalised to a percentage) alongside `stated_p`, fires when SD's stated\n    probability diverges from the market by more than 20 points AND the entry\n    carries no non-empty `market_divergence_reason`.\n\n    Strategic frame: a material gap from a deep market is alpha-or-error worth\n    scrutiny — SD must either anchor to the market or declare an explicit\n    structural-mechanism reason for diverging. Presence-conditional (silent if\n    no SD-PRED-CAL, or if no entry carries market_q). WARN-ONLY — NOT a promote\n    blocker, since a declared, reasoned divergence may be legitimate alpha.\n    Sibling fetcher: scripts/fetch-market-q.py.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return None  # parse error already surfaced by check_prediction_calibration\n    if not isinstance(preds, list):\n        return None\n    flags = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        mq, sp = p.get('market_q'), p.get('stated_p')\n        if not isinstance(mq, (int, float)) or not isinstance(sp, (int, float)):\n            continue\n        market_pct = mq * 100.0 if mq <= 1 else float(mq)\n        stated_pct = sp * 100.0 if sp <= 1 else float(sp)\n        if abs(stated_pct - market_pct) <= 20:\n            continue\n        reason = p.get('market_divergence_reason')\n        if isinstance(reason, str) and reason.strip():\n            continue  # declared structural-mechanism reason — legitimate alpha\n        pid = p.get('id', '?')\n        flags.append(\n            f\"{pid}: stated {stated_pct:.0f} vs liquid-market {market_pct:.0f} \"\n            f\"(gap {abs(stated_pct - market_pct):.0f}pts), no divergence reason\")\n    if not flags:\n        return None\n    return ('market-q divergence (EDITORIAL-PREDICTION-MARKET-ANCHOR-01): '\n            + '; '.join(flags)\n            + ' — anchor to the market or declare a structural market_divergence_reason')\n\n\ndef check_market_differentiation(html: str) -> str | None:\n    \"\"\"EDITORIAL-PREDICTION-DIFFERENTIATION-01 (2026-05-31) — flags formal\n    predictions that don't earn a place in SD's niche. SD cannot compete with\n    liquid prediction markets (Polymarket/Kalshi/Metaculus) on questions they\n    already price, nor add value with near-certain calls readable off a public\n    tracker. Fires per-prediction when ANY of:\n      (1) MARKET-COVERED — entry carries market_q AND |stated_p - market_q| <= 20\n          (SD repricing a number a liquid market already owns). This is the\n          inverse of check_market_divergence (which flags the >20 case).\n      (2) NEAR-CERTAIN — data-derived q_blend >= 0.90 or <= 0.10 (exante engine);\n          trivially readable off the public trajectory.\n      (3) NO-EDGE — exante composition NEGATIVE / WEAK-positive (no structural\n          skill over base-rate / coin-flip).\n    Escape valve: a non-empty market_divergence_reason (a declared structural\n    mechanism the market hasn't absorbed) marks the prediction differentiated ->\n    silent. Presence-conditional, WARN-ONLY (advisory; calibrate before any\n    blocking). Sibling generation rule E6 self-censors at generation time.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return None  # parse error already surfaced by check_prediction_calibration\n    if not isinstance(preds, list):\n        return None\n    exante = None\n    try:\n        import importlib.util as _ilu\n        _p = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                          'evaluate-prediction-exante.py')\n        _spec = _ilu.spec_from_file_location('_exante_engine_md', _p)\n        exante = _ilu.module_from_spec(_spec)\n        _spec.loader.exec_module(exante)\n    except Exception:\n        exante = None\n    flags = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        pid = p.get('id', '?')\n        reason = p.get('market_divergence_reason')\n        if isinstance(reason, str) and reason.strip():\n            continue  # declared structural mechanism — differentiated, silent\n        sp = p.get('stated_p')\n        mq = p.get('market_q')\n        # (1) MARKET-COVERED — within 20pts of a liquid market for the same outcome\n        if isinstance(mq, (int, float)) and isinstance(sp, (int, float)):\n            market_pct = mq * 100.0 if mq <= 1 else float(mq)\n            stated_pct = sp * 100.0 if sp <= 1 else float(sp)\n            if abs(stated_pct - market_pct) <= 20:\n                flags.append(\n                    f\"{pid}: MARKET-COVERED (stated {stated_pct:.0f} vs liquid-market \"\n                    f\"{market_pct:.0f}, gap {abs(stated_pct - market_pct):.0f}pts — \"\n                    \"repricing a number a liquid market already owns)\")\n                continue\n        # (2)/(3) NEAR-CERTAIN / NO-EDGE via the exante engine (needs a trajectory)\n        series = p.get('series')\n        if exante and isinstance(series, list) and len(series) >= 3 and isinstance(sp, (int, float)):\n            try:\n                spec = {\n                    'id': pid, 'claim': p.get('claim', ''),\n                    'stated_p': sp / 100.0 if sp > 1 else sp,\n                    'direction': p.get('direction'), 'threshold': p.get('threshold'),\n                    'resolve_date': p.get('resolve_date'), 'series': series,\n                }\n                if isinstance(mq, (int, float)):\n                    spec['market_q'] = mq / 100.0 if mq > 1 else mq\n                r = exante.evaluate(spec)\n                qb = r.get('q_blend')\n                comp = r.get('composition', '')\n                sp_norm = sp / 100.0 if sp > 1 else sp\n                # NEAR-CERTAIN only when SD AGREES with an extreme data reading\n                # (low-information echo of the trajectory). A large stated_p-vs-q\n                # gap is a CONTRARIAN/structural call (or over-confidence) —\n                # that's the calibration gate's domain, not differentiation.\n                if isinstance(qb, (int, float)) and (\n                        (qb >= 0.90 and sp_norm >= 0.80) or (qb <= 0.10 and sp_norm <= 0.20)):\n                    flags.append(\n                        f\"{pid}: NEAR-CERTAIN (data-derived q~={round(qb * 100)}, \"\n                        f\"stated {round(sp_norm * 100)} agrees — trivially readable \"\n                        \"off the public trajectory)\")\n                # NO-EDGE = WEAK-positive only (near-base-rate, no structural skill).\n                # NEGATIVE composition = miscalibration -> calibration gate, not here.\n                elif comp.startswith('WEAK-positive'):\n                    flags.append(\n                        f\"{pid}: NO-EDGE ({comp} — no structural skill over base-rate)\")\n            except Exception:\n                pass\n    if not flags:\n        return None\n    return ('market differentiation (EDITORIAL-PREDICTION-DIFFERENTIATION-01): '\n            + '; '.join(flags)\n            + ' — replace with a differentiated structural call (name the mechanism '\n            'in market_divergence_reason) or drop; do not compete with liquid markets')\n\n\ndef check_banned_es_calcos(html: str) -> str | None:\n    \"\"\"RUNNER-ES-CALCO-GATE-01 (2026-05-31) — deterministic backstop for banned\n    Spanish calcos/jargon. The translator glossary (e.g. ratchet→NOT \"trinquete\")\n    is a prompt instruction that degrades under prompt-overload; this runner\n    check can't be dropped by the model. Reads data/banned_es_terms.yaml (terms\n    are Spanish-only tokens → document-wide scan is effectively ES-scoped), fires\n    on any un-negated match. Presence-conditional, WARN-only (calibrate-then-\n    ratchet). Sibling prompt rule: the translator VOCABULARIO 'JAMÁS uses' clause.\"\"\"\n    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                        '..', 'data', 'banned_es_terms.yaml')\n    if not os.path.exists(path):\n        return None\n    try:\n        import yaml\n        with open(path, encoding='utf-8') as f:\n            doc = yaml.safe_load(f) or {}\n    except Exception:\n        return None\n    terms = doc.get('terms', []) if isinstance(doc, dict) else []\n    flags = []\n    for t in terms:\n        if not isinstance(t, dict) or not t.get('pattern'):\n            continue\n        m = re.search(t['pattern'], html, re.IGNORECASE)\n        if not m:\n            continue\n        neg = t.get('context_negate')\n        if neg:\n            window = html[max(0, m.start() - 120):m.end() + 120]\n            if re.search(neg, window, re.IGNORECASE):\n                continue\n        gloss = t.get('gloss', '')\n        flags.append(f\"'{m.group(0)}' (use: {gloss})\")\n    if not flags:\n        return None\n    return ('banned ES calco (RUNNER-ES-CALCO-GATE-01): '\n            + '; '.join(flags)\n            + ' — translator glossary slipped under load; replace the calco')\n\n\ndef check_es_predcal_claim_parity(html: str) -> str | None:\n    \"\"\"RUNNER-ES-PREDCAL-PARITY-01 (2026-05-31) — bilingual-parity backstop for\n    the brief-tab FORMAL PREDICTIONS summary. `_briefTabFormalPredictions`\n    (Format Output) renders each prediction's `claim` from the English-only\n    SD-PRED-CAL block for BOTH languages, so the ES summary shows English claims\n    (a parity defect the independent review flagged). Deterministic, zero-FP\n    detection: each PRED id renders once in the lang-en div and once in lang-es;\n    if the two claim strings are byte-identical (and non-trivial), the ES claim\n    was never translated. WARN-only (advisory; the durable fix is `claim_es` in\n    SD-PRED-CAL + FO rendering — EDITORIAL-PREDCAL-CLAIM-ES-01).\"\"\"\n    items = re.findall(r'<strong>\\s*(PRED\\s*\\d+)\\s*[—\\-]\\s*([^<]+?)\\s*</strong>', html)\n    by_id: dict[str, list[str]] = {}\n    for pid, claim in items:\n        by_id.setdefault(re.sub(r'\\s+', ' ', pid).strip(), []).append(claim.strip())\n    flags = []\n    for pid, claims in by_id.items():\n        if len(claims) >= 2 and len(claims[0]) > 20 and claims[0] == claims[1]:\n            flags.append(f\"{pid}: brief-tab claim identical in EN+ES (ES claim untranslated)\")\n    if not flags:\n        return None\n    return ('ES prediction-claim parity (RUNNER-ES-PREDCAL-PARITY-01): '\n            + '; '.join(flags)\n            + ' — add claim_es to SD-PRED-CAL + render it in the ES brief-tab summary')\n\n\ndef _load_forward_events() -> list:\n    \"\"\"Load data/forward_events.yaml once. Returns [] if missing or yaml absent.\"\"\"\n    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),\n                        '..', 'data', 'forward_events.yaml')\n    if not os.path.exists(path):\n        return []\n    try:\n        import yaml\n        with open(path, encoding='utf-8') as f:\n            doc = yaml.safe_load(f) or {}\n    except Exception:\n        return []\n    return doc.get('events', []) if isinstance(doc, dict) else []\n\n\ndef check_forward_event_date_consistency(html: str) -> str | None:\n    \"\"\"FORWARD-EVENT-DATE-REGISTRY-01 (2026-06-05) — deterministic guard against\n    binding a load-bearing forward-event milestone to a date that contradicts the\n    canonical timeline in data/forward_events.yaml.\n\n    Motivation: Brief 9 / 2218 asserted the ECB \"Pontes … Q1 2028 go-live\"; Pontes\n    actually goes live Q3 2026 — the Q1 2028 date belongs to the SEPARATE Appia track,\n    and the brief also conflated Pontes (wholesale DLT settlement) with the retail\n    digital euro. The L4 Tavily judge flagged it (CONTRADICTED) but only as a\n    NON-DETERMINISTIC warning that shipped. This converts that error class into a\n    deterministic finding so a future generation can't silently reproduce it.\n\n    Each registry event may carry `forbidden_bindings`: regexes representing a WRONG\n    pairing of the event name with a contradicting date token (bilingual). Matched\n    against TAG-STRIPPED, whitespace-collapsed text so glossary/table bindings are\n    visible. WARN-only (calibrate-then-ratchet); presence-conditional (no-op when no\n    binding matches). Authoring guidance lives in data/forward_events.yaml.\"\"\"\n    events = _load_forward_events()\n    if not events:\n        return None\n    # Drop the correction/EoC meta-sections first: a `<section id=\"revisions\">`\n    # or `<section id=\"concern\">` legitimately RESTATES the already-fixed error\n    # (e.g. \"an earlier version said Pontes … Q1 2028\"), so scanning it would\n    # re-flag the very date the erratum documents as corrected.\n    # Exact id match (audit 2026-06-05 Finding 5): anchor the closing quote so\n    # only id=\"revisions\"/id=\"concern\" are exempted, not a id=\"revisions-foo\"\n    # prefix family that could hide a load-bearing wrong date.\n    html = re.sub(r'<section id=\"(?:revisions|concern)\"[^>]*>.*?</section>', ' ',\n                  html, flags=re.DOTALL | re.IGNORECASE)\n    text = re.sub(r'<[^>]+>', ' ', html)\n    text = re.sub(r'\\s+', ' ', text)\n    flags = []\n    for ev in events:\n        if not isinstance(ev, dict):\n            continue\n        canonical = ev.get('canonical', '?')\n        name = ev.get('name', ev.get('id', '?'))\n        for pat in ev.get('forbidden_bindings') or []:\n            try:\n                m = re.search(pat, text, re.IGNORECASE)\n            except re.error:\n                continue\n            if m:\n                snippet = re.sub(r'\\s+', ' ', m.group(0)).strip()\n                flags.append(f\"{name}: '{snippet}' contradicts canonical {canonical}\")\n                break  # one finding per event is enough\n    if not flags:\n        return None\n    return ('forward_event_date_consistency (FORWARD-EVENT-DATE-REGISTRY-01): '\n            + '; '.join(flags)\n            + ' — verify the brief against data/forward_events.yaml + the cited ECB source')\n\n\ndef check_prediction_selfdecl_grounding(html: str) -> str | None:\n    \"\"\"SD-PRED-CAL current_value grounding (2026-05-30) — the not-already-true\n    gate TRUSTS the model's self-declared current_value, so a fabricated/stale\n    one slips past (Brief 9 PRED1 declared GENIUS current_value:0 though already\n    enacted). This verifies each current_value is (a) CONSISTENT with its own\n    series (the trajectory should end near it) and (b) for a non-trivial numeric\n    value, GROUNDED in the brief body (the figure appears in the prose, per\n    figure-fidelity). Binary/event current_values (abs < 2) are not independently\n    verifiable here and rely on facts.yaml / L4 Tavily. Presence-conditional,\n    WARN-only.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return None\n    if not isinstance(preds, list):\n        return None\n    # body text with comments + tags stripped (so we don't match the block itself)\n    body_text = re.sub(r'<[^>]+>', ' ', re.sub(r'<!--.*?-->', ' ', html, flags=re.S))\n    body_nospace = re.sub(r'[,\\s]', '', body_text)\n    failures = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        pid = p.get('id', '?')\n        cv = p.get('current_value')\n        if not isinstance(cv, (int, float)) or isinstance(cv, bool):\n            continue\n        # (a) series consistency\n        series = p.get('series')\n        if isinstance(series, list) and series:\n            try:\n                last = float(series[-1][1])\n            except (ValueError, TypeError, IndexError):\n                last = None\n            if last is not None and abs(cv - last) / max(abs(last), 1.0) > 0.25:\n                failures.append(\n                    f'{pid}: current_value {cv} inconsistent with its own series '\n                    f'(which ends at {last})')\n        # (b) body grounding for non-trivial numeric values\n        if abs(cv) >= 2:\n            s = str(int(cv)) if float(cv).is_integer() else str(cv)\n            if s not in body_text and s not in body_nospace:\n                failures.append(\n                    f'{pid}: current_value {cv} not found in the brief body '\n                    '(ungrounded — verify it is a real, fire-time figure)')\n    if not failures:\n        return None\n    return ('SD-PRED-CAL current_value grounding '\n            '(EDITORIAL-PREDICTION-CALIBRATION-SELFDECL-01): ' + '; '.join(failures))\n\n\n_PRED_DONE_VERB = re.compile(\n    r'\\b(enacted|signed into law|passed into law|ratified|took effect|'\n    r'came into (?:force|effect)|entered into force|in (?:force|effect)|'\n    r'went live)\\b', re.I)\n_PRED_FUTURE_MARK = re.compile(\n    r'\\b(will|shall|would|expected to|projected to|set to|by 20\\d\\d|likely to|'\n    r'on track to|forecast)\\b', re.I)\n\n\ndef check_prediction_claim_state_consistency(html: str) -> str | None:\n    \"\"\"Prediction already-resolved signal (2026-05-30) — the free, deterministic\n    complement to the L4 Tavily event-verification backstop. For an event/binary\n    prediction declared as NOT-YET-reached (direction above, current_value below\n    threshold), flag when the CLAIM itself uses completed-event language ('enacted',\n    'in force', 'took effect') in NON-future framing — an internal contradiction\n    with the current_value (Brief 9 PRED1: claim 'GENIUS Act enacted ...' with\n    current_value:0). An already-resolved event is not a forecast. WARN-only;\n    L4 Tavily is the recency-safe backstop for events whose state is neither in\n    the claim nor settled in the brief body. Presence-conditional.\"\"\"\n    m = re.search(r'<!--\\s*SD-PRED-CAL:\\s*(\\[.*?\\])\\s*-->', html, re.S)\n    if not m:\n        return None\n    try:\n        preds = json.loads(m.group(1))\n    except (ValueError, TypeError):\n        return None\n    if not isinstance(preds, list):\n        return None\n    failures = []\n    for p in preds:\n        if not isinstance(p, dict):\n            continue\n        cv, thr, d = p.get('current_value'), p.get('threshold'), p.get('direction')\n        claim = str(p.get('claim', ''))\n        if not (isinstance(cv, (int, float)) and isinstance(thr, (int, float))):\n            continue\n        if not (d == 'above' and cv < thr):\n            continue\n        dm = _PRED_DONE_VERB.search(claim)\n        if dm:\n            ctx = claim[max(0, dm.start() - 40):dm.end() + 10]\n            if not _PRED_FUTURE_MARK.search(ctx):\n                failures.append(\n                    f\"{p.get('id', '?')}: claim asserts a completed event \"\n                    f\"('{dm.group(0)}') but current_value {cv} < threshold {thr} \"\n                    \"treats it as not-yet — an already-resolved event is not a forecast\")\n    if not failures:\n        return None\n    return ('prediction already-resolved signal '\n            '(claim-state vs current_value): ' + '; '.join(failures))\n\n\ndef check_canonical_brief_structure(html: str) -> str | None:\n    \"\"\"RUNNER-CANONICAL-STRUCTURE-DIFF-01 (2026-05-22 PM) — programmatic gate\n    against data/canonical_brief_structure.yaml spec. Catches what L7\n    narrative-only canonical review missed in Brief 8 v4 session (~7 gaps:\n    missing FORMAL PREDICTIONS brief-tab section, \"What's in Full Analysis\"\n    nav block, Audit Trail section; brief-tab DOUBLE TRACK reversed format;\n    §-prefix on brief-tab h2; missing canonical ES translations; EXEC SUMMARY\n    not 3¶ canonical structure; cover-verdict anti-patterns).\n\n    Memory: feedback_canonical_must_be_machine_readable_target_2026-05-22\n    Spec: data/canonical_brief_structure.yaml\n    Sibling tooling: scripts/canonical-brief-diff.py (CLI version)\n\n    WARN-only Brief 8-10 calibration; ratchet BLOCKER Brief 11+ per\n    [[feedback_runner_calibrate_then_ratchet]]. Conditional no-op on briefs\n    without standard tab-brief/tab-full structure (legacy single-tab briefs).\n    \"\"\"\n    try:\n        findings = _canonical_diff_findings(html)\n    except Exception as e:\n        return f'canonical_brief_structure diff failed: {type(e).__name__}: {str(e)[:100]}'\n    if findings is None:\n        return None\n\n    must_fix = findings.get('must_fix', [])\n    should_fix = findings.get('should_fix', [])\n    if not must_fix and not should_fix:\n        return None\n    # Compose WARN summary; show top-3 of each severity\n    parts = []\n    if must_fix:\n        parts.append(f'{len(must_fix)} MUST-FIX (sample: {\" | \".join(must_fix[:3])})')\n    if should_fix:\n        parts.append(f'{len(should_fix)} SHOULD-FIX (sample: {\" | \".join(should_fix[:3])})')\n    return (\n        'canonical-structure diff vs Brief 7 spec: ' + '; '.join(parts) +\n        '. Run `python3 scripts/canonical-brief-diff.py <brief>` for full report.'\n    )\n\n\nWARNINGS: list[tuple[str, Callable[[str], str | None]]] = [\n    ('translation_body_parity',         check_translation_body_parity),\n    ('meta_tags_present',               check_meta_tags_present),\n    ('minimum_word_count',              check_minimum_word_count),\n    ('numerical_citation_contract',     check_numerical_citation_contract),\n    ('critical_facts',                  check_critical_facts),\n    # MAGNITUDE-FRAMING-CONSISTENCY 2026-05-08 — catches same-number\n    # opposite-semantic flips against facts.yaml::magnitude_flips. Origin:\n    # Brief 6 Critical Minerals pre-promote audit ('40% short' vs 'at 40%').\n    # WARNING (calibrate-then-ratchet); promote to BLOCKER after corpus\n    # baseline clean across 3+ briefs.\n    ('magnitude_framing_consistency',   check_magnitude_framing_consistency),\n    ('date_arithmetic',                 check_date_arithmetic),\n    # QA-INCONTESTABLE-03 — temporal_window_consistency\n    # Catches time-range labels in §VERDICT or §FORMAL PREDICTIONS that\n    # don't match adjacent ISO date-span arithmetic ±10%, with hedge-word\n    # exemption. Complements date_arithmetic: this targets explicit\n    # \"(N|N-M) (unit) window\" patterns; date_arithmetic targets\n    # \"(N) (unit) running to/expires/expira/hasta DATE\" duration claims.\n    ('temporal_window_consistency',     check_temporal_window_consistency),\n    ('investor_verdict_contradiction',  check_investor_verdict_contradiction),\n    ('bare_inference_number',           check_bare_inference_number),\n    ('internal_monitor_clarity',        check_internal_monitor_clarity),\n    ('mdtohtml_paragraph_wrap_block_tag', check_mdtohtml_paragraph_wrap_block_tag),\n    # §ESTA SEMANA item 5 priority #4 (2026-05-11) — structural-malformation\n    # HTML check: TOC ↔ body section-anchor integrity. Origin: Brief 6\n    # (commit cb3ac78) ES TOC missing ACRÓNIMOS + en-sec-9 missing MOFCOM;\n    # caught only by post-promote human inspection. WARNING-class; promote\n    # to BLOCKER after one clean Brief 7+ window per calibrate-then-ratchet.\n    ('toc_anchor_integrity',            check_toc_anchor_integrity),\n    # EMPTY-ELEMENT-RUNNER-CHECK-01 (2026-05-17 PM) — detect CSS-classed\n    # empty elements orphaned by editorial surgery. Canonical: Brief 7 2151\n    # cover-meta empty div (fixed commit fa1faa2). Whitelist via\n    # <!-- intentionally-empty --> marker on preceding line.\n    ('empty_styled_elements',           check_empty_styled_elements),\n    ('investment_recommendation_patterns', check_investment_recommendation_patterns),\n    # EDITORIAL-LEGAL-06 A4 — cross-lingual topic self-reference detector\n    # (B5-PATTERN-EN bug class, commit 9cec1a0). Heuristic; may have false\n    # positives — pre-publish reviewer confirms.\n    ('cross_lingual_topic_consistency',    check_cross_lingual_topic_consistency),\n    # EDITORIAL-LEGAL-06 A5 / EL-04 D5.5 — sources density floor (≥10\n    # distinct URLs in footnotes + sources block; target 12-13). Conditional\n    # no-op on legacy briefs without footnote markup or sources block.\n    ('sources_density_minimum',            check_sources_density_minimum),\n    # EDITORIAL-LEGAL-02 D01 — anchor-number → footnote-URL gate. WARNING for\n    # baseline week (Brief 5 baseline; promote to BLOCKER after 1 week clean,\n    # following the L01/footnote_integrity precedent). Conditional: brief\n    # with 0 <li id=\"fn-N\"> entries → no-op.\n    ('anchor_number_footnote_url',         check_anchor_number_footnote_url),\n    # Bilingual sources presence — catches Pattern C (EN-only sources block on\n    # a brief that has body fn-N refs). Origin: Briefs 3-5 erratum 2026-05-08\n    # (commit 7d87415). WARNING; promote to BLOCKER after a stable corpus window.\n    # Cross-ref project_sd_briefs_sources_block_patterns.md.\n    ('bilingual_sources_present',          check_bilingual_sources_present),\n    # RUNNER-CHECK-FOOTNOTE-SCOPE-MISMATCH-01 (2026-05-25) — detects footnotes\n    # anchoring claims from different dates without source-scope disambiguation.\n    # Canonical: Brief 8 fn-8 (US Treasury sb0197 dated 18 July 2025) anchored\n    # Nov 2025 + Jun 2025 claims. Fixed via Source-scope note + [SD-attribution]\n    # in fn-8 body (commit aa08039). WARN tier initially; promote to BLOCKER\n    # after 2 briefs stable per calibrate-then-ratchet. Auto-suppresses on\n    # footnote bodies containing Source-scope note / [SD-attribution] /\n    # [SD-aggregate] / [SD-estimate] / [SD-inference] markers.\n    ('footnote_scope_date_mismatch',       check_footnote_scope_date_mismatch),\n    # EDITORIAL-LEGAL-02 L03 — listed-entity density × financial-metric proximity.\n    # WARNING (not BLOCKER) because legitimate sectoral discussion may name\n    # companies. Catalogue at data/listed-entities.yaml; chat-Claude proposes\n    # additions per EDITORIAL-LEGAL-06 role 2.\n    ('listed_entity_density',              check_listed_entity_density),\n    ('placeholder_template_failure',       check_placeholder_template_failure),\n    # EDITORIAL-LEGAL-02 P03 partial — false-friend \"billón/trillón\" detector\n    # in ES scope. WARNING (not BLOCKER) — promotion attempted 2026-05-27 PM\n    # was reverted same-session after Brief 8 W22 false-positive cluster\n    # surfaced: all 59 hits were legitimate 10^12 translations (Brief 8 EN\n    # uses \"T\" shorthand `$6.5T`/`¥175.49T` which the original context-negate\n    # didn't recognize). Context-negate regex extended same commit to handle\n    # T-shorthand; severity kept at WARN to preserve human-review surface on\n    # any future genuine slip without gating publish. Full P03 numerical-\n    # equivalence sweep still deferred.\n    ('cross_lingual_false_friend',  check_cross_lingual_false_friend),\n    # RUNNER-DEFENSIBILITY-01 DEF01-DEF03 — defensibility heuristics\n    # 2026-05-02. Calibrated against 5-brief published corpus\n    # (reports/defensibility-audit_2026-05-02.md). All WARNING in first\n    # round; promote to BLOCKER after stable false-positive baseline.\n    ('def01_formal_predictions_present',  check_def01_formal_predictions_present),\n    ('def02_primary_contribution_present', check_def02_primary_contribution_present),\n    ('def03_canonical_url_floor',          check_def03_canonical_url_floor),\n    # Added 2026-05-15 PM per Brief 7 audit C1: DOUBLE TRACK tables with\n    # `GAP 0-10` column must have directional legend; without it readers\n    # misread HIGH = consonance instead of incoherence. WARNING; calibrate\n    # against Brief 8+ before considering BLOCKER promotion.\n    ('double_track_gap_legend',            check_double_track_gap_legend),\n    # D-QA-22 — kill-criterion floor #2 width. Operator-adopted 2026-05-04 (Y):\n    # ≥2 predictions with falsifiable_by_date ≤ 2026-10-31 per Brief 6/7/8\n    # to widen base from 3 → 9. WARNING (heuristic guidance, not enforcing);\n    # legacy briefs honestly flag the gap that motivated the rule.\n    ('predictions_floor2_width',           check_predictions_floor2_width),\n    # Brief 7 prep priorities #8 item 4 — structural-malformation WARN\n    # consolidates h3-inside-p + dark-bg-bare-strong + toc-sync-broken +\n    # sections-inside-tab-full. Calibrate against Brief 7+8; ratchet to\n    # BLOCKER Brief 9+ per feedback_runner_calibrate_then_ratchet.\n    ('html_structural_integrity',          check_html_structural_integrity),\n    # FORECASTING-DISCIPLINE-LESSONS-2026-05-12-01 sub-item 3: posterior-predictive\n    # checks per Gelman pattern. Validate Tier A E3/E5/E9 emission discipline by\n    # encoding what brief output SHOULD look like and measuring vs prediction.\n    # WARN-only; calibrate against Brief 7+8 emission under Tier A; ratchet to\n    # BLOCKER Brief 9+ per feedback_runner_calibrate_then_ratchet.\n    ('prediction_e3_cluster_id_reference', check_prediction_e3_cluster_id_reference),\n    ('prediction_e5_ternary_sum_100',      check_prediction_e5_ternary_sum_100),\n    ('prediction_e9_pos_threshold_numeric',check_prediction_e9_pos_threshold_numeric),\n    # A2 PREDICTION-EMISSION-VALIDATOR-01 (2026-05-15 PM) — extends existing\n    # E3/E5/E9 per-emission format checks with E1/E2/E4 per-prediction semantic\n    # validation. WARN-only during Brief 7-8 calibration; promote to BLOCKER\n    # Brief 9+ if FP rate ≤1/brief per feedback_runner_calibrate_then_ratchet.\n    ('prediction_e1_base_rate_sentence',     check_prediction_e1_base_rate_sentence),\n    ('prediction_e2_named_verification_doc', check_prediction_e2_named_verification_doc),\n    ('prediction_e4_class_label',            check_prediction_e4_class_label),\n    # RUNNER-PREDICTION-DISCIPLINE-CHECKS-01 (2026-05-18) — brier-retro-W20 §6.2+§6.1\n    # supplements closing class-distribution + compound-structure gaps. WARN-only\n    # Brief 7-8 calibration; ratchet BLOCKER Brief 9+ per\n    # [[feedback_runner_calibrate_then_ratchet]].\n    ('prediction_class_distribution',        check_prediction_class_distribution),\n    ('prediction_compound_structure',        check_prediction_compound_structure),\n    # PREDICTIONS-SUB-DEADLINE-LADDER-DISTRIBUTION-01 (2026-05-18) — brier-retro\n    # §3 anti-clustering + horizon distribution. WARN-only Brief 7-8; ratchet\n    # BLOCKER Brief 9+ per [[feedback_runner_calibrate_then_ratchet]].\n    ('prediction_deadline_distribution',     check_prediction_deadline_distribution),\n    # Extensions B + D + G + A5 (2026-05-18 PM bundle) — spec §4 closure +\n    # net-new long-horizon disclosure. WARN-only Brief 8+ calibration;\n    # ratchet BLOCKER Brief 10+ per [[feedback_runner_calibrate_then_ratchet]].\n    ('prediction_adversarial_premortem',     check_prediction_adversarial_premortem),\n    ('prediction_multi_path_elicitation',    check_prediction_multi_path_elicitation),\n    ('brief_type_zero_catalog',              check_brief_type_zero_catalog),\n    ('prediction_long_horizon_disclosure',   check_prediction_long_horizon_disclosure),\n    # MERMAID-PER-FEEDBACK-LOOP-RUNNER-CHECK-01 (2026-05-17 PM) — defensive\n    # guard for EDITORIAL-PROMPT-FEEDBACK-LOOPS-DIAGRAM-PER-LOOP-01 prompt\n    # rule. WARN-only Brief 7-8 calibration; ratchet BLOCKER Brief 9+ per\n    # [[feedback_runner_floor_with_nudge_high_leverage_2026-05-17]].\n    ('mermaid_per_feedback_loop',           check_mermaid_per_feedback_loop),\n    # RUNNER-FIRST-USE-EXPANSION-CHECK-LF20-01 (2026-05-17 PM) — Thread B\n    # acronym canon mechanization. 2+ operator catches in Brief 7 retrofit\n    # → mechanize per [[feedback_operator_catch_threshold_triggers_mechanization_2026-05-17]].\n    # WARN-only Brief 7-8 calibration; ratchet BLOCKER Brief 9+.\n    ('acronym_first_use_expansion',         check_acronym_first_use_expansion),\n    # RUNNER-CANONICAL-STRUCTURE-DIFF-01 (2026-05-22 PM) — programmatic gate\n    # against data/canonical_brief_structure.yaml. Catches L7-narrative-missed\n    # structural gaps. WARN-only Brief 8-10 calibration; ratchet BLOCKER\n    # Brief 11+ per [[feedback_runner_calibrate_then_ratchet]].\n    ('canonical_brief_structure',           check_canonical_brief_structure),\n    ('canonical_brief_must_fix',            check_canonical_brief_must_fix),\n    ('single_glance_coverage',              check_single_glance_coverage),\n    ('slug_topic_consistency',              check_slug_topic_consistency),\n    ('prediction_calibration',              check_prediction_calibration),\n    # EDITORIAL-PREDICTION-MARKET-ANCHOR-01 (2026-05-30) — flags a material gap\n    # (>20pts) between SD's stated_p and a declared liquid-market market_q when\n    # no market_divergence_reason is given (alpha-or-error worth scrutiny).\n    # WARN-ONLY — NOT a promote blocker (reasoned divergence may be legit alpha).\n    # Sibling fetcher: scripts/fetch-market-q.py.\n    ('market_divergence',                   check_market_divergence),\n    # EDITORIAL-PREDICTION-DIFFERENTIATION-01 (2026-05-31) — inverse of\n    # market_divergence: flags predictions that are MARKET-COVERED (within 20pts\n    # of a liquid market), NEAR-CERTAIN (q_blend>=.9/<=.1), or NO-EDGE\n    # (WEAK/NEGATIVE composition). SD's niche is differentiated structural calls,\n    # not competing with Polymarket/Kalshi/Metaculus. WARN-ONLY (advisory).\n    ('market_differentiation',              check_market_differentiation),\n    # RUNNER-ES-CALCO-GATE-01 (2026-05-31) — deterministic backstop for banned\n    # Spanish calcos (data/banned_es_terms.yaml; trinquete/weaponizar/…). The\n    # durable guard the translator glossary prompt-line can't be. WARN-only.\n    ('banned_es_calcos',                    check_banned_es_calcos),\n    # FORWARD-EVENT-DATE-REGISTRY-01 (2026-06-05) — deterministic guard that a\n    # load-bearing forward-event milestone (ECB Pontes/Appia/digital-euro, …) isn't\n    # bound to a date contradicting data/forward_events.yaml. Origin: Brief 9 / 2218\n    # \"Pontes … Q1 2028\" (real go-live Q3 2026; Q1 2028 = Appia) — caught only by a\n    # non-deterministic L4 Tavily warning. WARN-only (calibrate-then-ratchet).\n    ('forward_event_date_consistency',      check_forward_event_date_consistency),\n    # RUNNER-ES-PREDCAL-PARITY-01 (2026-05-31) — brief-tab FORMAL PREDICTIONS\n    # bilingual-parity backstop: flags a prediction claim rendered byte-identical\n    # in EN+ES (ES untranslated, from English-only SD-PRED-CAL.claim). WARN-only.\n    ('es_predcal_claim_parity',             check_es_predcal_claim_parity),\n    # SD-PRED-CAL current_value grounding (2026-05-30) — verifies the model's\n    # self-declared current_value is series-consistent + grounded in the brief\n    # body, so the not-already-true gate can't trust a fabricated/stale number.\n    ('prediction_selfdecl_grounding',       check_prediction_selfdecl_grounding),\n    # Prediction already-resolved signal (2026-05-30) — free deterministic\n    # complement to L4 Tavily: claim asserts a completed event but current_value\n    # treats it as not-yet (Brief 9 PRED1 GENIUS 'enacted' + current_value:0).\n    ('prediction_claim_state_consistency',  check_prediction_claim_state_consistency),\n    # RUNNER-INTERNAL-BASELINE-CONSISTENCY-01 (2026-05-26) — sibling to\n    # check_companion_brief_consistency, scope = brief-INTERNAL contradictions.\n    # Canonical: Brief 8 §II Scenario A stale \"$2T per Bessent Jun 2025\" vs\n    # §I+§III+§VI revised \"$3T by 2030 Nov 2025\" — same entity, same metric,\n    # different value across sections. Per BRIEF-8-CRITICAL-REVIEW-FOLLOWUP-01\n    # Phase D. WARN-tier first instance per\n    # [[feedback_runner_calibrate_then_ratchet]]. Revision-pair tolerance:\n    # sections that co-disclose both values within ±200 chars (e.g.,\n    # \"$3T (revision from $2T baseline)\") are NOT flagged.\n    ('internal_baseline_consistency',       check_internal_baseline_consistency),\n    # BRIEF-INFOGRAPHIC-PANEL-01 (2026-05-30) — glance_panel_parity. Guards the\n    # \"Brief at a Glance\" infographic panel deployed today into the Forecaster\n    # (renders Brief 9+ only). PRESENCE-CONDITIONAL: silent when no glance-panel\n    # element exists, so the 8 pre-glance briefs never fire. When present:\n    # bilingual lang-en/lang-es parity within the panel + cover↔panel base-case\n    # % consistency. WARN-only; ratchet BLOCKER after a clean Brief 9-11 window\n    # per [[feedback_runner_calibrate_then_ratchet]].\n    ('glance_panel_parity',                 check_glance_panel_parity),\n    # EDITORIAL-COVERAGE-BALANCE-01 Part B (2026-05-30) — coverage_balance_metadata.\n    # Validates the self-emitted `<!-- SD-COVERAGE: {...} -->` block that\n    # brief-saver.py:ingest_coverage_tags parses into data/coverage_tags.json on\n    # promote. Brief 9 is the FIRST brief to emit it (rule live in the Forecaster\n    # since 2026-05-29). PRESENCE-CONDITIONAL: silent when no SD-COVERAGE block\n    # exists, so the 8 legacy briefs predating the rule never fire. When present:\n    # WARN if the JSON does not parse OR is missing a required key (epistemic_stance,\n    # horizon, frame, sector, regions, title) — the only gate that catches a\n    # malformed/incomplete emission (ingest is best-effort + silently no-ops).\n    # WARN-only; ratchet BLOCKER after a clean Brief 9-11 window per\n    # [[feedback_runner_calibrate_then_ratchet]].\n    ('coverage_balance_metadata',           check_coverage_balance_metadata),\n    # EDITORIAL-COVERAGE-BALANCE-01 Part A (2026-06-05) — coverage_counter_frame_present.\n    # Ledger directional diversity: a brief with >=3 formal predictions MUST carry\n    # >=1 honest direction:counter-frame prediction (detected via the [COUNTER-FRAME]\n    # / \"Direction: counter-frame\" brief-tab marker). PRESENCE-CONDITIONAL (silent\n    # unless >=3 SD-PRED-CAL entries). WARN on the corpus runner (legacy briefs\n    # self-document as gaps); name added to brief-saver PROMOTE_BLOCKERS so a NEW\n    # brief cannot ship one-directional. Fixes the \"good Brier reads as luck\" risk\n    # for the 2026-08-15 / 2026-10-31 first-resolution batches.\n    ('coverage_counter_frame_present',      check_coverage_counter_frame_present),\n    # BRIEF-GLANCE-DIGEST-01 (2026-05-30) — glance_digest. Validates the\n    # self-emitted `<!-- SD-GLANCE: {...} -->` machine-readable digest block\n    # (thesis/headline_prediction/key_figures/takeaway). Distinct from the\n    # rendered glance-panel guarded by glance_panel_parity. PRESENCE-CONDITIONAL:\n    # silent when no SD-GLANCE block exists, so legacy briefs never fire. When\n    # present: WARN if JSON does not parse / wrong shape / prob out of 55-95 /\n    # by not ISO / key_figures incomplete. WARN-only; ratchet BLOCKER after a\n    # clean window per [[feedback_runner_calibrate_then_ratchet]].\n    ('glance_digest',                       check_glance_digest),\n]\n\n\ndef _derive_brand_from_filename(filename: str | None) -> str | None:\n    \"\"\"Derive brand from brief filename prefix:\n    - SD_*       → 'shadow-dynamics'\n    - CL_* / CLAVE_* → 'clave'\n    - other / None → None (no brand filter; full registry applies)\n\n    FORK-PREP-GAP-1-DATA-BRAND-AWARE-01: brand-aware facts/listed-entities\n    filtering keyed off brief filename. SD_*.html prefix is canonical per\n    existing convention; CL_*/CLAVE_* reserved for first Clave brief naming.\n    \"\"\"\n    if not filename:\n        return None\n    base = os.path.basename(filename).upper()\n    if base.startswith('SD_'):\n        return 'shadow-dynamics'\n    if base.startswith('CL_') or base.startswith('CLAVE_'):\n        return 'clave'\n    return None\n\n\n# Checks that accept an optional brand context. Run via dispatch in run_checks\n# rather than the BLOCKERS/WARNINGS Callable[[str], ...] iteration.\n_BRAND_AWARE_CHECKS = frozenset({'critical_facts', 'listed_entity_density'})\n\n# Checks that accept the optional `sample_mode` kwarg (Approach A plumb).\n# Adding a check name here is enough to route `--sample N` through; the check\n# is responsible for honoring `sample_mode=int|None` per its docstring.\n_SAMPLE_AWARE_CHECKS = frozenset({\n    'anchor_number_footnote_url',\n    'cross_lingual_false_friend',\n})\n\n\ndef run_checks(html: str, briefs_dir: str | None = None,\n               check_urls: bool = False,\n               filename: str | None = None,\n               sample_mode: int | None = None) -> list[CheckResult]:\n    \"\"\"Run all checks against html, return list of CheckResult.\n\n    `check_urls=True` enables network-dependent URL HEAD probes (off by\n    default — CI shouldn't depend on outbound network).\n\n    `filename` (optional) feeds brand-derivation for brand-aware checks\n    (`critical_facts` + `listed_entity_density`). When None, those checks\n    run with brand=None (back-compat: full registry, no filter).\n\n    `sample_mode` (int|None): when an integer, sample-aware checks\n    (`_SAMPLE_AWARE_CHECKS`) append up to that many categorized samples\n    under their warning line. Origin: 2026-05-27 PM per\n    [[feedback_w_report_raw_count_vs_sampled_population]]; off by default.\n    \"\"\"\n    results: list[CheckResult] = []\n    brand = _derive_brand_from_filename(filename)\n\n    for name, fn in BLOCKERS:\n        kwargs = {}\n        if name in _BRAND_AWARE_CHECKS:\n            kwargs['brand'] = brand\n        if sample_mode is not None and name in _SAMPLE_AWARE_CHECKS:\n            kwargs['sample_mode'] = sample_mode\n        detail = fn(html, **kwargs) if kwargs else fn(html)\n        results.append(CheckResult(\n            name=name, severity='blocker',\n            passed=detail is None, detail=detail,\n        ))\n\n    for name, fn in WARNINGS:\n        kwargs = {}\n        if name in _BRAND_AWARE_CHECKS:\n            kwargs['brand'] = brand\n        if sample_mode is not None and name in _SAMPLE_AWARE_CHECKS:\n            kwargs['sample_mode'] = sample_mode\n        detail = fn(html, **kwargs) if kwargs else fn(html)\n        results.append(CheckResult(\n            name=name, severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    # Special-case: link integrity needs filesystem context\n    if briefs_dir:\n        detail = check_local_link_integrity(html, briefs_dir)\n        results.append(CheckResult(\n            name='local_link_integrity', severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    # Special-case: pattern citations check optionally consults briefs_dir\n    detail = check_pattern_citations(html, briefs_dir=briefs_dir)\n    results.append(CheckResult(\n        name='pattern_citations', severity='warning',\n        passed=detail is None, detail=detail,\n    ))\n\n    # RUNNER-COMPANION-BRIEF-CONSISTENCY-01 (2026-05-26) — special-case dispatch\n    # because the check needs BOTH briefs_dir + filename to locate companion\n    # files. WARN-tier first instance per [[feedback_runner_calibrate_then_ratchet]].\n    # No-op when either arg missing or companions absent — preserves CLI/HTTP\n    # callers that don't pass filesystem context.\n    detail = check_companion_brief_consistency(\n        html, brand=brand, briefs_dir=briefs_dir, filename=filename,\n    )\n    results.append(CheckResult(\n        name='companion_brief_consistency', severity='warning',\n        passed=detail is None, detail=detail,\n    ))\n\n    # Network-dependent: cited URL integrity\n    if check_urls:\n        detail = check_url_integrity(html, network=True)\n        results.append(CheckResult(\n            name='url_integrity', severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    return results\n\n\ndef summarise(results: list[CheckResult]) -> dict:\n    blockers = [r for r in results if r.severity == 'blocker' and not r.passed]\n    warnings = [r for r in results if r.severity == 'warning' and not r.passed]\n    return {\n        'pass': len(blockers) == 0,\n        'blockers': [asdict(r) for r in blockers],\n        'warnings': [asdict(r) for r in warnings],\n        'total_checks': len(results),\n    }\n\n\ndef log_failure(filename: str, summary: dict, log_path: str = '/var/log/sd-quality-failures.log') -> None:\n    \"\"\"Append failed checks to a log for retrospective analysis.\"\"\"\n    if summary['pass'] and not summary['warnings']:\n        return  # nothing worth logging\n    try:\n        with open(log_path, 'a') as f:\n            entry = {\n                'ts': datetime.now(timezone.utc).isoformat(),\n                'file': filename,\n                **summary,\n            }\n            f.write(json.dumps(entry, ensure_ascii=False) + '\\n')\n    except OSError:\n        # Failure to log is not failure of the check; just stderr it\n        sys.stderr.write(f'[check-briefs] could not write {log_path}\\n')\n\n\n# ── CLI ──────────────────────────────────────────────────────────────────────\n\n_BRIEFS_DIR_BY_BRAND = {\n    'shadowdynamics': '/root/n8n/local-files/briefs',\n    'clave':          '/root/n8n/local-files/briefs-clave',\n}\n\n\ndef main(argv: list[str] | None = None) -> int:\n    parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])\n    parser.add_argument('paths', nargs='*',\n                        help='HTML files or globs (default: scan brand briefs dir)')\n    parser.add_argument('--brand', choices=['shadowdynamics', 'clave'],\n                        default='shadowdynamics',\n                        help='Brand context — affects default --briefs-dir and default scan path')\n    parser.add_argument('--briefs-dir', default=None,\n                        help='Briefs directory for link-integrity check (default: brand-specific)')\n    parser.add_argument('--json', action='store_true', help='Emit JSON output')\n    parser.add_argument('--no-log', action='store_true',\n                        help='Do not append failures to log file')\n    parser.add_argument('--blockers-only', action='store_true',\n                        help='Exit non-zero only on blocker failures')\n    parser.add_argument('--check-urls', action='store_true',\n                        help='Enable network-dependent URL HEAD probes')\n    # `--sample [N]` with optional int (default 20 when flag passed bare; off\n    # when flag absent). Sample-aware checks (D01 + P03 today; extensible via\n    # `_SAMPLE_AWARE_CHECKS`) attach a categorized indented sample block under\n    # their warning line. Origin 2026-05-27 PM per\n    # [[feedback_w_report_raw_count_vs_sampled_population]].\n    #\n    # Implementation note: avoid `nargs='?'+type=int` because it greedily\n    # consumes the next argv token and dies on non-int positionals (paths).\n    # Instead pre-scan argv: if `--sample` followed by digits, capture N; else\n    # default 20. Strip from argv before argparse so the positional is clean.\n    sample_mode: int | None = None\n    if argv is None:\n        _argv = list(sys.argv[1:])\n    else:\n        _argv = list(argv)\n    i = 0\n    while i < len(_argv):\n        a = _argv[i]\n        if a == '--sample':\n            sample_mode = 20\n            # peek next token\n            if i + 1 < len(_argv) and _argv[i + 1].lstrip('-').isdigit():\n                try:\n                    sample_mode = int(_argv[i + 1])\n                except ValueError:\n                    pass\n                else:\n                    del _argv[i + 1]\n            del _argv[i]\n            continue\n        if a.startswith('--sample='):\n            try:\n                sample_mode = int(a.split('=', 1)[1])\n            except ValueError:\n                sample_mode = 20\n            del _argv[i]\n            continue\n        i += 1\n    args = parser.parse_args(_argv)\n    args.sample = sample_mode\n\n    if args.briefs_dir is None:\n        args.briefs_dir = _BRIEFS_DIR_BY_BRAND[args.brand]\n\n    files: list[str] = []\n    if not args.paths:\n        files = sorted(glob.glob(os.path.join(args.briefs_dir, '*.html')))\n    else:\n        for p in args.paths:\n            if os.path.isfile(p):\n                files.append(p)\n            else:\n                files.extend(glob.glob(p))\n        files = sorted(set(files))\n\n    if not files:\n        print('no files matched', file=sys.stderr)\n        return 2\n\n    overall_blockers = 0\n    overall_warnings = 0\n    summaries: dict[str, dict] = {}\n\n    for fp in files:\n        html = open(fp, encoding='utf-8').read()\n        results = run_checks(html, briefs_dir=args.briefs_dir,\n                             check_urls=args.check_urls,\n                             filename=fp,\n                             sample_mode=args.sample)\n        summary = summarise(results)\n        summaries[fp] = summary\n\n        if not args.no_log and (summary['blockers'] or summary['warnings']):\n            log_failure(fp, summary)\n\n        overall_blockers += len(summary['blockers'])\n        overall_warnings += len(summary['warnings'])\n\n    if args.json:\n        print(json.dumps(summaries, indent=2, ensure_ascii=False))\n    else:\n        for fp, s in summaries.items():\n            verdict = 'PASS' if s['pass'] and not s['warnings'] else \\\n                      'WARN' if s['pass'] else 'FAIL'\n            print(f'{verdict}  {os.path.basename(fp)}')\n            for b in s['blockers']:\n                print(f'   BLOCK  {b[\"name\"]}: {b[\"detail\"]}')\n            for w in s['warnings']:\n                print(f'   warn   {w[\"name\"]}: {w[\"detail\"]}')\n        print()\n        print(f'Files: {len(files)}  Blockers: {overall_blockers}  '\n              f'Warnings: {overall_warnings}')\n\n    if args.blockers_only:\n        return 1 if overall_blockers > 0 else 0\n    return 1 if (overall_blockers + overall_warnings) > 0 else 0\n\n\nif __name__ == '__main__':\n    sys.exit(main())\n"}