#!/usr/bin/env python3
"""Weekly review data collector: git + Gitea API → markdown report.

Pulls last N days (default 7) of activity from:
  - git log on origin/<default-branch>
  - Gitea API: PRs, issues, actions runs, branches
  - .learnings/ directory

Outputs a fact-only markdown report to testing/reports/retros/<YYYY-Www>.md.
Companion script `weekly-retro-issue.py` posts the report as a Gitea issue
(label: weekly-retro). Pattern analysis + candidate improvements + closure are
all carried by that issue (open = unresolved, close = closed loop).

This script does NOT do AI analysis and does NOT manage closure state.

Idempotent: re-running for the same ISO week overwrites the same file.

Usage:
    python3 scripts/ops/weekly-review.py                   # last 7 days, write file
    python3 scripts/ops/weekly-review.py --since-days 14   # custom window
    python3 scripts/ops/weekly-review.py --print           # stdout instead of file
    python3 scripts/ops/weekly-review.py --out /tmp/r.md   # custom path

Auth:
    Reads GITEA_API_TOKEN (or GITEA_TOKEN) from env. Required.
"""

from __future__ import annotations

import argparse
import json
import re
import subprocess
import sys
from collections import Counter, defaultdict
from datetime import datetime, timedelta
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent))
from _gitea_api import Api, detect_default_branch, detect_repo, get_token, normalize_email_local, paginate  # noqa: E402

REPO_ROOT = Path(__file__).resolve().parents[2]
LEARNINGS_DIR = REPO_ROOT / ".learnings"
REPORTS_DIR = REPO_ROOT / "testing" / "reports" / "retros"

ACTION_OUTCOMES = ("success", "failure", "cancelled", "skipped")
TITLE_MAX = 80

RELEASE_BASE_REFS = ("staging", "production", "main", "master")
LEARNINGS_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}-")
PATH_REF_RE = re.compile(r"@(refs/(?:heads|tags|pull)/[^@\s]+)")
PULL_REF_RE = re.compile(r"^refs/pull/(\d+)/head$")
HEADS_REF_RE = re.compile(r"^refs/heads/(.+)$")


def _now_local() -> datetime:
    return datetime.now().astimezone()


def _normalize_author_key(email: str, name: str) -> str:
    """Stable identity key for one human across multiple git configs.

    Same person commits as `Chentao Jia <chentao.jia@ff.com>` AND
    `chentao.jia <chentao.jia@noreply.localhost>` AND
    `chentao.jia <4+chentao.jia@noreply.localhost>` — name-keyed counters
    triple-count them. Email local-part normalization is the workhorse;
    fall back to the name only when email is missing/unparseable.
    """
    local = normalize_email_local(email)
    if local:
        return local
    return (name or "unknown").strip().lower()


def _pick_display_name(name_counts: Counter) -> str:
    """Prefer a proper-cased name when the same key has multiple display forms."""
    if not name_counts:
        return "unknown"
    proper = {n: c for n, c in name_counts.items() if any(ch.isupper() for ch in n)}
    if proper:
        return max(proper.items(), key=lambda x: x[1])[0]
    return name_counts.most_common(1)[0][0]


def _extract_ref(run: dict) -> str:
    """Resolve a workflow run's source ref.

    Gitea Actions returns `head_branch=null` for `pull_request` events; the
    actual ref lives inside `path` (`workflow.yml@refs/pull/301/head`). Push /
    schedule events do populate `head_branch` and still encode the ref in
    `path`, so parsing `path` is the most uniform path.
    """
    path = run.get("path") or ""
    m = PATH_REF_RE.search(path)
    if m:
        ref = m.group(1)
        pull_m = PULL_REF_RE.match(ref)
        if pull_m:
            return f"pr-{pull_m.group(1)}"
        heads_m = HEADS_REF_RE.match(ref)
        if heads_m:
            return heads_m.group(1)
        return ref
    head_branch = run.get("head_branch")
    if head_branch:
        return head_branch
    return "unknown"


def _classify_module(path: str) -> str:
    """Map a file path to a coarse module/topic label for management view."""
    if path.startswith(".gitea/workflows/"):
        return "ci/workflows"
    if path.startswith(".githooks/"):
        return "ci/hooks"
    if path.startswith(".learnings/"):
        return "learnings"
    if path.startswith("docs/standards/"):
        return "docs/standards"
    if path.startswith("docs/ops/"):
        return "docs/ops"
    if path.startswith("docs/modules/"):
        m = re.match(r"docs/modules/([^/]+)/", path)
        return f"module:{m.group(1)}" if m else "docs/modules"
    if path.startswith("docs/"):
        return "docs/other"
    m = re.match(r"\.(?:agents|claude)/skills/([^/]+)/", path)
    if m:
        return f"skill:{m.group(1)}"
    if path.startswith(".agents/") or path.startswith(".claude/"):
        return "agents/config"
    if path.startswith("backend/src/modules/"):
        m = re.match(r"backend/src/modules/([^/]+)/", path)
        return f"module:{m.group(1)}" if m else "backend/modules"
    if path.startswith("backend/src/core/"):
        m = re.match(r"backend/src/core/([^/]+)/", path)
        return f"backend-core:{m.group(1)}" if m else "backend/core"
    if path.startswith("backend/"):
        return "backend/other"
    m = re.match(r"frontend/src/app/\(([^)]+)\)/", path)
    if m:
        return f"frontend:{m.group(1)}"
    if path.startswith("frontend/src/locales/"):
        m = re.match(r"frontend/src/locales/([^/]+)/", path)
        return f"i18n:{m.group(1)}" if m else "frontend/i18n"
    if path.startswith("frontend/"):
        return "frontend/other"
    if path.startswith("scripts/dev/agent-pool/"):
        return "scripts/agent-pool"
    if path.startswith("scripts/dev/"):
        return "scripts/dev"
    if path.startswith("scripts/ops/"):
        return "scripts/ops"
    if path.startswith("scripts/deploy/"):
        return "scripts/deploy"
    if path.startswith("scripts/"):
        return "scripts/other"
    if path.startswith("prisma/"):
        return "prisma"
    if path.startswith("testing/"):
        return "testing"
    return "other"


def _prev_iso_week(year: int, week: int) -> tuple[int, int]:
    """ISO week math that wraps year boundaries (week 1 → prev year's last week)."""
    cur_monday = datetime.fromisocalendar(year, week, 1)
    prev_monday = cur_monday - timedelta(days=7)
    iso = prev_monday.isocalendar()
    return iso.year, iso.week


def _parse_candidates(body: str) -> list[str]:
    """Extract candidate titles from a '## 候选改进' section in an issue body."""
    if not body:
        return []
    m = re.search(r"^## 候选改进\s*$(.+?)(?:^## |\Z)", body, re.MULTILINE | re.DOTALL)
    if not m:
        return []
    section = m.group(1)
    return [c.strip() for c in re.findall(r"^- \*\*(.+?)\*\*", section, re.MULTILINE)]


def _parse_iso8601(s: str) -> datetime:
    return datetime.fromisoformat(s)


def _safe_parse(s):
    try:
        return _parse_iso8601(s) if s else None
    except (ValueError, TypeError):
        return None


# ---------- Data collectors ----------

def collect_prs(api: Api, cutoff: datetime) -> dict:
    raw = paginate(api, "/pulls", {"state": "all", "sort": "newest"}, max_pages=10)
    now = _now_local()

    opened_in_window = []
    merged_in_window = []
    closed_no_merge_in_window = []
    open_now = [p for p in raw if p.get("state") == "open"]
    open_long = []

    for pr in raw:
        cr = _safe_parse(pr.get("created_at"))
        if cr and cr >= cutoff:
            opened_in_window.append(pr)
        mg = _safe_parse(pr.get("merged_at"))
        if mg and mg >= cutoff:
            merged_in_window.append(pr)
        if pr.get("closed_at") and not pr.get("merged_at"):
            cl = _safe_parse(pr["closed_at"])
            if cl and cl >= cutoff:
                closed_no_merge_in_window.append(pr)
        if pr.get("state") == "open" and cr and (now - cr).days >= 3:
            open_long.append(pr)

    feature_durations = []
    release_durations = []
    for pr in merged_in_window:
        cr = _safe_parse(pr.get("created_at"))
        mg = _safe_parse(pr.get("merged_at"))
        if not (cr and mg):
            continue
        hours = (mg - cr).total_seconds() / 3600
        base = (pr.get("base") or {}).get("ref", "")
        if base in RELEASE_BASE_REFS:
            release_durations.append(hours)
        else:
            feature_durations.append(hours)

    def _avg(xs):
        return sum(xs) / len(xs) if xs else 0.0

    all_durations = feature_durations + release_durations

    return {
        "opened_in_window": opened_in_window,
        "merged_in_window": merged_in_window,
        "closed_no_merge_in_window": closed_no_merge_in_window,
        "open_now": open_now,
        "open_long": open_long,
        "avg_merge_hours": _avg(all_durations),
        "feature_count": len(feature_durations),
        "feature_avg_hours": _avg(feature_durations),
        "release_count": len(release_durations),
        "release_avg_hours": _avg(release_durations),
    }


def collect_issues(api: Api, cutoff: datetime) -> dict:
    raw = paginate(
        api, "/issues",
        {"state": "all", "type": "issues", "sort": "newest"},
        max_pages=10,
    )
    opened_in_window = [i for i in raw if (_safe_parse(i.get("created_at")) or datetime.min.astimezone()) >= cutoff]
    closed_in_window = []
    for i in raw:
        cl = _safe_parse(i.get("closed_at"))
        if cl and cl >= cutoff:
            closed_in_window.append(i)
    durations = []
    for i in closed_in_window:
        cr = _safe_parse(i.get("created_at"))
        cl = _safe_parse(i.get("closed_at"))
        if cr and cl:
            durations.append((cl - cr).total_seconds() / 3600)
    avg_close_hours = sum(durations) / len(durations) if durations else 0
    open_now = [i for i in raw if i.get("state") == "open"]
    return {
        "opened_in_window": opened_in_window,
        "closed_in_window": closed_in_window,
        "open_now": open_now,
        "avg_close_hours": avg_close_hours,
    }


def collect_actions(api: Api, cutoff: datetime) -> dict:
    """Gitea /actions/runs ignores limit and returns all runs in one response.
    Time field is `started_at` (not created_at). Outcome is in `conclusion`
    (status is always 'completed' for finished runs)."""
    code, txt = api.get("/actions/runs", {"limit": 50})
    if code != 200:
        print(f"WARN: GET /actions/runs → HTTP {code}", file=sys.stderr)
        raw = []
    else:
        data = json.loads(txt)
        raw = data.get("workflow_runs", [])

    in_win = []
    for r in raw:
        ts = _safe_parse(r.get("started_at")) or _safe_parse(r.get("completed_at"))
        if not ts:
            continue
        if ts < cutoff:
            continue
        in_win.append(r)

    by_workflow: dict[str, dict] = defaultdict(
        lambda: {"total": 0, **{k: 0 for k in ACTION_OUTCOMES}, "durations_sec": []}
    )
    by_branch: Counter = Counter()
    failed = []

    for r in in_win:
        wf_path = r.get("path") or r.get("workflow_id") or "unknown"
        wf = wf_path.split("@")[0]
        by_workflow[wf]["total"] += 1
        outcome = (r.get("conclusion") or r.get("status") or "unknown").lower()
        if outcome in ACTION_OUTCOMES:
            by_workflow[wf][outcome] += 1
        if outcome == "failure":
            failed.append(r)
        st = _safe_parse(r.get("started_at"))
        en = _safe_parse(r.get("completed_at"))
        if st and en and en >= st:
            by_workflow[wf]["durations_sec"].append((en - st).total_seconds())
        by_branch[_extract_ref(r)] += 1

    return {
        "total": len(in_win),
        "by_workflow": dict(by_workflow),
        "by_branch": dict(by_branch),
        "failed": failed,
    }


def collect_stale_branches(api: Api, default_branch: str, stale_days: int = 7) -> list:
    raw = paginate(api, "/branches", {}, max_pages=10)
    cutoff = _now_local() - timedelta(days=stale_days)
    protected = {default_branch, "production", "staging", "main", "master"}
    stale = []
    for b in raw:
        name = b.get("name")
        if name in protected:
            continue
        commit = b.get("commit") or {}
        ts = commit.get("timestamp") or (commit.get("author") or {}).get("date")
        dt = _safe_parse(ts) if ts else None
        if not dt:
            continue
        if dt < cutoff:
            stale.append({
                "name": name,
                "last_commit_at": ts,
                "age_days": (_now_local() - dt).days,
                "author": (commit.get("author") or {}).get("name", ""),
            })
    stale.sort(key=lambda b: b["age_days"], reverse=True)
    return stale


def collect_git_stats(default_branch: str, since: datetime) -> dict:
    """One git log invocation interleaving commit metadata + file paths.

    Format: each commit emits `COMMIT <sha>|<name>|<email>|<date>|<subject>`
    followed by its changed file paths (one per line) until the next COMMIT
    line. Author identity is keyed by normalized email (see
    `_normalize_author_key`) so the same human across multiple git configs
    collapses to one row.
    """
    since_iso = since.strftime("%Y-%m-%d %H:%M:%S")
    ref = f"origin/{default_branch}"
    try:
        out = subprocess.check_output(
            ["git", "log", ref, f"--since={since_iso}",
             "--format=COMMIT %H|%aN|%aE|%ai|%s", "--name-only"],
            cwd=REPO_ROOT, timeout=30,
        ).decode().strip()
    except subprocess.CalledProcessError:
        out = ""

    commits = []
    file_counter: Counter = Counter()
    author_names: dict[str, Counter] = defaultdict(Counter)
    author_counts: Counter = Counter()
    commits_by_module: dict[str, set] = defaultdict(set)
    files_by_module: Counter = Counter()
    current_sha = None
    current_key = None
    for line in out.split("\n") if out else []:
        if line.startswith("COMMIT "):
            parts = line[7:].split("|", 4)
            if len(parts) == 5:
                sha, name, email, date, subject = parts
                key = _normalize_author_key(email, name)
                commits.append({
                    "sha": sha, "author": name, "email": email,
                    "author_key": key, "date": date, "subject": subject,
                })
                author_names[key][name] += 1
                author_counts[key] += 1
                current_sha = sha
                current_key = key
        elif line.strip():
            path = line.strip()
            file_counter[path] += 1
            module = _classify_module(path)
            files_by_module[module] += 1
            if current_sha:
                commits_by_module[module].add(current_sha)

    hot_files = [(f, c) for f, c in file_counter.most_common(30) if c >= 3]
    by_author = {
        _pick_display_name(author_names[key]): count
        for key, count in author_counts.most_common()
    }
    active_dates = {d.date() for c in commits if (d := _safe_parse(c["date"]))}
    module_stats = sorted(
        (
            {
                "module": m,
                "commits": len(commits_by_module[m]),
                "file_changes": files_by_module[m],
            }
            for m in files_by_module
        ),
        key=lambda x: (-x["commits"], -x["file_changes"]),
    )

    return {
        "commits": commits,
        "by_author": by_author,
        "hot_files": hot_files,
        "active_days": len(active_dates),
        "module_stats": module_stats,
    }


def collect_learnings(since: datetime) -> list:
    """List dated learning files newer than the cutoff.

    Files must match `YYYY-MM-DD-*.md`; index files like `ERRORS.md` /
    `LEARNINGS.md` / `FEATURE_REQUESTS.md` are skipped so they don't inflate
    the weekly count just because they were edited.
    """
    if not LEARNINGS_DIR.exists():
        return []
    cutoff_str = since.strftime("%Y-%m-%d")
    files = []
    for f in sorted(LEARNINGS_DIR.glob("*.md")):
        if not LEARNINGS_DATE_RE.match(f.name):
            continue
        if f.name[:10] >= cutoff_str:
            files.append(f.name)
    return files


def collect_previous_week_retro(api: Api, iso_year: int, iso_week: int) -> dict | None:
    """Find the previous ISO week's weekly-retro issue + its candidate list.

    Returns None if no such issue exists. Closure is at issue level; per-
    candidate state isn't tracked in the body, so we only surface the candidate
    titles and let the user/skill judge what carried over.
    """
    prev_year, prev_week = _prev_iso_week(iso_year, iso_week)
    title = f"周复盘 {prev_year}-W{prev_week:02d}"
    params = {"state": "all", "type": "issues", "labels": "weekly-retro",
              "limit": 50, "sort": "newest"}
    code, txt = api.get("/issues", params)
    if code != 200:
        print(f"WARN: prev-week issue lookup → HTTP {code}", file=sys.stderr)
        return None
    matches = [it for it in json.loads(txt) if it.get("title") == title]
    if not matches:
        return None
    def _sort_key(i):
        ts = _safe_parse(i.get("created_at"))
        return (i.get("state") != "closed", -(ts.timestamp() if ts else 0))

    matches.sort(key=_sort_key)
    issue = matches[0]
    return {
        "title": title,
        "number": issue.get("number"),
        "state": issue.get("state"),
        "url": issue.get("html_url"),
        "closed_at": issue.get("closed_at"),
        "candidates": _parse_candidates(issue.get("body") or ""),
        "iso_year": prev_year,
        "iso_week": prev_week,
    }


# ---------- Markdown rendering ----------

def fmt_pct(n, d):
    return f"{(100 * n / d):.0f}%" if d else "N/A"


def fmt_dur_sec(sec: float) -> str:
    m = sec / 60
    if m < 60:
        return f"{m:.1f}m"
    h = m / 60
    return f"{h:.1f}h"


def _short_item(item: dict, *, with_base: bool = False) -> str:
    n = item.get("number")
    title = (item.get("title") or "").replace("\n", " ").strip()
    if len(title) > TITLE_MAX:
        title = title[: TITLE_MAX - 3] + "..."
    user_obj = item.get("user") or {}
    user = user_obj.get("username") or user_obj.get("login") or "?"
    suffix = ""
    if with_base:
        base = (item.get("base") or {}).get("ref", "?")
        suffix = f" → {base}"
    return f"#{n} {title} — {user}{suffix}"


def short_pr(pr: dict) -> str:
    return _short_item(pr, with_base=True)


def short_issue(it: dict) -> str:
    return _short_item(it)


def render_report(ctx: dict) -> str:
    win_start = ctx["window_start"]
    win_end = ctx["window_end"]
    iso = win_end.isocalendar()
    iso_year, iso_week = iso.year, iso.week

    L = []
    L.append(f"# Weekly Review · {iso_year}-W{iso_week:02d} ({win_start.strftime('%b %d')} – {win_end.strftime('%b %d')})")
    L.append("")
    L.append(f"**生成时间**: {ctx['generated_at'].strftime('%Y-%m-%d %H:%M %z')}")
    L.append(f"**数据源**: git origin/{ctx['default_branch']} + Gitea API ({ctx['repo']})")
    L.append(f"**窗口**: {win_start.strftime('%Y-%m-%d %H:%M')} → {win_end.strftime('%Y-%m-%d %H:%M')} ({ctx['window_days']} days)")
    L.append("")
    L.append("> 本报告**只列事实**。pattern 识别 / 候选改进 / 闭环承载在对应 Gitea issue（label: `weekly-retro`，open=未闭环 / close=完成）。")
    L.append("")
    L.append("---")
    L.append("")

    prs = ctx["prs"]
    issues = ctx["issues"]
    actions = ctx["actions"]
    git = ctx["git"]
    branches = ctx["stale_branches"]
    learnings = ctx["learnings"]
    prev_retro = ctx.get("prev_retro")

    if prev_retro:
        py, pw = prev_retro["iso_year"], prev_retro["iso_week"]
        state = prev_retro["state"]
        n = prev_retro["number"]
        url = prev_retro["url"] or ""
        L.append(f"## 0. 上周复盘状态（{py}-W{pw:02d}）")
        L.append("")
        state_emoji = "✅" if state == "closed" else "🟡"
        closed_note = ""
        if state == "closed" and prev_retro.get("closed_at"):
            cl = _safe_parse(prev_retro["closed_at"])
            if cl:
                closed_note = f"，关闭于 {cl.strftime('%Y-%m-%d %H:%M')}"
        L.append(f"- Issue: [#{n}]({url}) — **{state}** {state_emoji}{closed_note}")
        cands = prev_retro.get("candidates") or []
        if cands:
            L.append(f"- 上周候选改进 ({len(cands)})：")
            for c in cands:
                L.append(f"  - {c}")
        else:
            L.append("- 上周 issue 无 `## 候选改进` 段（首次跑或 skill 未介入）。")
        if state != "closed":
            L.append("")
            L.append("> ⚠️ 上周 issue 仍 open。建议本周 candidates 前先 review 上周遗留，必要时 carry-over。")
        L.append("")

    actions_total = actions["total"]
    actions_success = sum(w["success"] for w in actions["by_workflow"].values())
    actions_fail = sum(w["failure"] for w in actions["by_workflow"].values())
    actions_cancelled = sum(w["cancelled"] for w in actions["by_workflow"].values())

    L.append("## 1. 数字概览")
    L.append("")
    L.append("| 指标 | 本周 | 备注 |")
    L.append("|---|---|---|")
    L.append(f"| Commit (default branch) | {len(git['commits'])} | {git['active_days']} 个活跃天 |")
    L.append(f"| PR opened | {len(prs['opened_in_window'])} | |")
    pr_merged_note = (
        f"feature {prs['feature_count']} (avg {prs['feature_avg_hours']:.1f}h)"
        f", release {prs['release_count']} (avg {prs['release_avg_hours']:.1f}h)"
    )
    L.append(f"| PR merged | {len(prs['merged_in_window'])} | {pr_merged_note} |")
    L.append(f"| PR closed (no merge) | {len(prs['closed_no_merge_in_window'])} | 放弃 / 重做 |")
    L.append(f"| PR open (current) | {len(prs['open_now'])} | 含 long-lived |")
    L.append(f"| PR long-lived (>3d) | {len(prs['open_long'])} | |")
    L.append(f"| Issue opened | {len(issues['opened_in_window'])} | |")
    L.append(f"| Issue closed | {len(issues['closed_in_window'])} | 平均关闭 {issues['avg_close_hours']:.1f}h |")
    L.append(f"| Issue open (current) | {len(issues['open_now'])} | |")
    L.append(f"| Actions runs | {actions_total} | success {actions_success} ({fmt_pct(actions_success, actions_total)}), fail {actions_fail}, cancelled {actions_cancelled} |")
    L.append(f"| Stale branches | {len(branches)} | ≥7 天无活动 |")
    L.append(f"| .learnings 新增 | {len(learnings)} | |")
    L.append("")

    if git["by_author"]:
        L.append("**贡献者**: " + ", ".join(f"{a}({c})" for a, c in sorted(git["by_author"].items(), key=lambda x: -x[1])))
        L.append("")

    L.append("## 2. PR 活动")
    L.append("")
    L.append(f"### 已合并 ({len(prs['merged_in_window'])})")
    L.append("")
    if prs["merged_in_window"]:
        for pr in sorted(prs["merged_in_window"], key=lambda p: p.get("merged_at") or "", reverse=True):
            mg = _safe_parse(pr.get("merged_at"))
            cr = _safe_parse(pr.get("created_at"))
            dur = ""
            if mg and cr:
                h = (mg - cr).total_seconds() / 3600
                dur = f"存活 {h:.1f}h"
            L.append(f"- {short_pr(pr)} — {dur}")
    else:
        L.append("_（本周没有 PR 合并）_")
    L.append("")

    if prs["closed_no_merge_in_window"]:
        L.append(f"### 关闭未合并 ({len(prs['closed_no_merge_in_window'])})")
        L.append("")
        for pr in prs["closed_no_merge_in_window"]:
            L.append(f"- {short_pr(pr)}")
        L.append("")

    if prs["open_long"]:
        L.append(f"### 长生命 PR (open >3 天, {len(prs['open_long'])})")
        L.append("")
        now = _now_local()
        for pr in sorted(prs["open_long"], key=lambda p: p.get("created_at") or ""):
            cr = _safe_parse(pr.get("created_at"))
            age = (now - cr).days if cr else "?"
            L.append(f"- {short_pr(pr)} — opened {age} 天前")
        L.append("")

    if prs["opened_in_window"]:
        opened_not_merged = [p for p in prs["opened_in_window"] if not p.get("merged_at")]
        if opened_not_merged:
            L.append(f"### 本周新开 (未合并, {len(opened_not_merged)})")
            L.append("")
            for pr in opened_not_merged:
                L.append(f"- {short_pr(pr)} — state: {pr.get('state')}")
            L.append("")

    L.append("## 3. Issue 活动")
    L.append("")
    if issues["opened_in_window"]:
        L.append(f"### 本周新开 ({len(issues['opened_in_window'])})")
        L.append("")
        for it in issues["opened_in_window"]:
            L.append(f"- {short_issue(it)} — state: {it.get('state')}")
        L.append("")
    if issues["closed_in_window"]:
        L.append(f"### 本周关闭 ({len(issues['closed_in_window'])})")
        L.append("")
        for it in issues["closed_in_window"]:
            cl = _safe_parse(it.get("closed_at"))
            cr = _safe_parse(it.get("created_at"))
            dur = ""
            if cl and cr:
                h = (cl - cr).total_seconds() / 3600
                dur = f" — 关闭耗时 {h:.1f}h"
            L.append(f"- {short_issue(it)}{dur}")
        L.append("")
    if not issues["opened_in_window"] and not issues["closed_in_window"]:
        L.append("_（本周无 issue 变更）_")
        L.append("")

    L.append("## 4. CI / Actions")
    L.append("")
    if actions["by_workflow"]:
        L.append("### Workflow 分布")
        L.append("")
        L.append("| Workflow | 总数 | success | failure | cancelled | 平均时长 |")
        L.append("|---|---|---|---|---|---|")
        for wf, s in sorted(actions["by_workflow"].items(), key=lambda x: -x[1]["total"]):
            avg = sum(s["durations_sec"]) / len(s["durations_sec"]) if s["durations_sec"] else 0
            L.append(f"| {wf} | {s['total']} | {s['success']} | {s['failure']} | {s['cancelled']} | {fmt_dur_sec(avg)} |")
        L.append("")
    if actions["by_branch"]:
        top_branches = sorted(actions["by_branch"].items(), key=lambda x: -x[1])[:10]
        L.append("**按分支**: " + ", ".join(f"{b}={c}" for b, c in top_branches))
        L.append("")

    if actions["failed"]:
        L.append(f"### 失败 run ({len(actions['failed'])})")
        L.append("")
        for r in actions["failed"][:30]:
            wf = (r.get("path") or "").split("@")[0]
            br = _extract_ref(r)
            sha = (r.get("head_sha") or "")[:8]
            url = r.get("html_url") or r.get("url") or ""
            title = (r.get("display_title") or "").replace("\n", " ")[:60]
            L.append(f"- [{wf}] branch={br} sha={sha} — {title} — {url}")
        L.append("")

    L.append("## 5. Stale Branches (≥7 天无活动)")
    L.append("")
    if branches:
        for b in branches[:30]:
            L.append(f"- `{b['name']}` — {b['age_days']} 天前 — {b['author']}")
    else:
        L.append("_（无）_")
    L.append("")

    L.append("## 6. 热点文件 (本周变更 ≥3 次)")
    L.append("")
    if git["hot_files"]:
        for f, c in git["hot_files"]:
            L.append(f"- `{f}` — {c} 次")
    else:
        L.append("_（无文件本周变更 ≥3 次）_")
    L.append("")

    L.append("## 6.1 模块维度 (按 commit 覆盖数)")
    L.append("")
    module_stats = git.get("module_stats") or []
    top_modules = [m for m in module_stats if m["commits"] >= 2][:15]
    if top_modules:
        L.append("| 模块 / 主题 | commits | file changes |")
        L.append("|---|---|---|")
        for m in top_modules:
            L.append(f"| `{m['module']}` | {m['commits']} | {m['file_changes']} |")
    else:
        L.append("_（无模块本周被 ≥2 个 commit 覆盖）_")
    L.append("")

    L.append(f"## 7. .learnings 新增 ({len(learnings)})")
    L.append("")
    if learnings:
        for f in learnings:
            L.append(f"- `{f}`")
        L.append("")
        L.append(f"> ⚠️ **本周新增 {len(learnings)} 条 learning，weekly-retro skill 在生成候选改进时**必须**评估其中是否有需要 distill 到 `docs/standards/` 或 `.agents/skills/` 的 pattern**。")
        L.append("> ")
        L.append("> 触发阈值（任一即出 distill 候选）：")
        L.append("> - 单周新增 ≥5 条 learning")
        L.append("> - 出现 ≥3 条同主题 learning（例：同一 trap / 同一 workflow / 同一 module）")
        L.append('> - 任何"踩坑→解决"型 learning，且解决方案可复用')
        L.append("> ")
        L.append("> 不做这步的后果：参见 2026-05 一次性积压 165 条 learning 才 distill 的事故（PR #403），单次工作量过大且事故信号被噪音淹没。")
    else:
        L.append("_（本周无新 learnings）_")
    L.append("")

    L.append("---")
    L.append("")
    L.append("_Generated by `scripts/ops/weekly-review.py`. Closure tracked in Gitea issue (label: `weekly-retro`)._")
    L.append("")

    return "\n".join(L)


def main() -> int:
    p = argparse.ArgumentParser(description="Weekly review data collector")
    p.add_argument("--since-days", type=int, default=7)
    p.add_argument("--repo", default=None, help="owner/repo, default: detect from origin")
    p.add_argument("--out", default=None, help="output md path. Default: testing/reports/retros/<YYYY-Www>.md")
    p.add_argument("--print", dest="print_only", action="store_true", help="Print to stdout instead of writing file")
    args = p.parse_args()

    token = get_token()
    repo = args.repo or detect_repo()
    default_branch = detect_default_branch()

    now = _now_local()
    cutoff = now - timedelta(days=args.since_days)

    api = Api(token, repo)

    print(f"Pulling data: repo={repo} branch={default_branch} since={cutoff:%Y-%m-%d %H:%M %z}", file=sys.stderr)

    git_stats = collect_git_stats(default_branch, cutoff)
    print(f"  git: {len(git_stats['commits'])} commits across {git_stats['active_days']} active days", file=sys.stderr)

    prs = collect_prs(api, cutoff)
    print(f"  PRs: {len(prs['merged_in_window'])} merged, {len(prs['opened_in_window'])} opened, {len(prs['open_long'])} long-lived", file=sys.stderr)

    issues = collect_issues(api, cutoff)
    print(f"  Issues: {len(issues['opened_in_window'])} opened, {len(issues['closed_in_window'])} closed", file=sys.stderr)

    actions = collect_actions(api, cutoff)
    print(f"  Actions: {actions['total']} runs, {len(actions['failed'])} failed", file=sys.stderr)

    branches = collect_stale_branches(api, default_branch)
    print(f"  Stale branches: {len(branches)}", file=sys.stderr)

    learnings = collect_learnings(cutoff)
    print(f"  Learnings: {len(learnings)} new", file=sys.stderr)

    iso = now.isocalendar()
    prev_retro = collect_previous_week_retro(api, iso.year, iso.week)
    if prev_retro:
        print(
            f"  Prev-week retro: #{prev_retro['number']} ({prev_retro['state']}) "
            f"with {len(prev_retro['candidates'])} candidate(s)",
            file=sys.stderr,
        )
    else:
        print("  Prev-week retro: not found", file=sys.stderr)

    out_path = Path(args.out) if args.out else REPORTS_DIR / f"{iso.year}-W{iso.week:02d}.md"

    ctx = {
        "generated_at": now,
        "window_start": cutoff,
        "window_end": now,
        "window_days": args.since_days,
        "repo": repo,
        "default_branch": default_branch,
        "git": git_stats,
        "prs": prs,
        "issues": issues,
        "actions": actions,
        "stale_branches": branches,
        "learnings": learnings,
        "prev_retro": prev_retro,
    }

    md = render_report(ctx)

    if args.print_only:
        print(md)
        return 0

    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(md, encoding="utf-8")
    print(f"\nReport written: {out_path}", file=sys.stderr)
    return 0


if __name__ == "__main__":
    sys.exit(main())