"""Shared Gitea API helper for ops scripts in this directory.

Why a separate module rather than 4 copies: `weekly-review.py`,
`weekly-retro-issue.py`, and `sweep-remote-stale.py` were each carrying the
same `Api` class + `get_token` + `detect_repo` shape. Adding a fourth script
would have made the drift even worse. This module is the single source of
truth for token resolution, repo discovery, and HTTP plumbing.

Imported by sibling scripts via `from _gitea_api import ...`. The leading
underscore signals "internal to scripts/ops/, not a public API surface."

Auth precedence in `get_token`:
  1. names passed via `extra_env_vars` (e.g. `("WEEKLY_RETRO_TOKEN",)`)
  2. `GITEA_API_TOKEN`
  3. `GITEA_TOKEN`
First non-empty wins; exits 1 if none set.
"""

from __future__ import annotations

import json
import os
import re
import subprocess
import sys
import urllib.error
import urllib.parse
import urllib.request

GITEA_HOST = os.environ.get("GITEA_HOST", "43.130.59.228")
BASE = f"http://{GITEA_HOST}"


def get_token(extra_env_vars: tuple[str, ...] = (),
              on_missing=None) -> str:
    """Resolve Gitea API token from env, preferring vars listed earlier.

    Default behavior (no `on_missing`): plain-text error to stderr + exit 1,
    suitable for batch scripts (sweep-remote-stale, weekly-review etc).

    Pass `on_missing=fn(checked_names)` to override the error emission — used
    by `scripts/ops/gitea` CLI to emit a `--json`-aware structured error. The
    callback owns the user message; this function still exits 1 after the
    callback returns (centralizing the exit code keeps callers honest).
    """
    checked = (*extra_env_vars, "GITEA_API_TOKEN", "GITEA_TOKEN")
    for name in checked:
        v = os.environ.get(name)
        if v:
            return v
    if on_missing is not None:
        on_missing(checked)
    else:
        expected = " / ".join((*extra_env_vars, "GITEA_API_TOKEN"))
        print(f"ERROR: {expected} not set", file=sys.stderr)
    sys.exit(1)


def normalize_email_local(email: str) -> str | None:
    """Lowercased email local-part with the Gitea noreply `<digits>+` prefix stripped.

    Gitea minted noreply emails as `<userid>+<username>@noreply.localhost`; web
    commits are `<username>@<domain>`. Same person across configs collapses to
    one key after stripping the numeric prefix. Returns None for missing /
    unparseable email — caller decides the fallback.
    """
    if not email or "@" not in email:
        return None
    local = email.split("@", 1)[0]
    local = re.sub(r"^\d+\+", "", local).strip().lower()
    return local or None


def detect_repo() -> str:
    """Parse `owner/repo` from `git config remote.origin.url`.

    Honors `$GITEA_REPO` env override (used by CI / non-git contexts). The
    regex tolerates both SSH (`ssh://git@host:port/...`) and HTTPS
    (`http(s)://host/...`) URLs. Git resolves the repo from any subdirectory,
    so we don't need to pin cwd.
    """
    env = os.environ.get("GITEA_REPO")
    if env:
        return env
    url = subprocess.check_output(
        ["git", "config", "--get", "remote.origin.url"],
    ).decode().strip()
    m = re.search(rf"{re.escape(GITEA_HOST)}(?::\d+)?[:/]([^/]+/[^/.]+)", url)
    if not m:
        print(f"ERROR: cannot parse owner/repo from {url}", file=sys.stderr)
        sys.exit(1)
    return m.group(1)


def detect_default_branch() -> str:
    try:
        out = subprocess.check_output(
            ["git", "remote", "show", "origin"],
            stderr=subprocess.DEVNULL, timeout=10,
        ).decode()
        m = re.search(r"HEAD branch:\s*(\S+)", out)
        if m:
            return m.group(1)
    except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
        pass
    return "develop"


class Api:
    """Repo-scoped Gitea HTTP client.

    `.get` / `.post` / `.patch` / `.delete` all take a `path` rooted at
    `/api/v1/repos/<owner>/<repo>` — so callers pass e.g. `/issues`, `/pulls`,
    `/branches/main`. For non-repo-scoped endpoints (`/users/<name>`,
    `/version`), use `call_root`.
    """

    def __init__(self, token: str, repo: str):
        self.token = token
        self.repo = repo

    def _request(self, method: str, path: str, params: dict | None = None,
                 body: dict | None = None) -> tuple[int, str]:
        qs = ("?" + urllib.parse.urlencode(params)) if params else ""
        url = f"{BASE}/api/v1/repos/{self.repo}{path}{qs}"
        data = json.dumps(body).encode() if body is not None else None
        req = urllib.request.Request(
            url, data=data, method=method,
            headers={
                "Authorization": f"token {self.token}",
                "Content-Type": "application/json",
                "Accept": "application/json",
            },
        )
        try:
            resp = urllib.request.urlopen(req, timeout=30)
            return resp.status, resp.read().decode()
        except urllib.error.HTTPError as e:
            return e.code, e.read().decode()

    def get(self, path: str, params: dict | None = None) -> tuple[int, str]:
        return self._request("GET", path, params=params)

    def post(self, path: str, body: dict) -> tuple[int, str]:
        return self._request("POST", path, body=body)

    def patch(self, path: str, body: dict) -> tuple[int, str]:
        return self._request("PATCH", path, body=body)

    def delete(self, path: str) -> tuple[int, str]:
        return self._request("DELETE", path)

    def call_root(self, method: str, path: str) -> tuple[int, str]:
        """Call `/api/v1<path>` directly (not under `/repos/<repo>`). For
        `/users/<name>`, `/version`, and other non-repo-scoped endpoints.
        """
        url = f"{BASE}/api/v1{path}"
        req = urllib.request.Request(
            url, method=method,
            headers={"Authorization": f"token {self.token}", "Accept": "application/json"},
        )
        try:
            resp = urllib.request.urlopen(req, timeout=15)
            return resp.status, resp.read().decode()
        except urllib.error.HTTPError as e:
            return e.code, e.read().decode()


def paginate(api: Api, path: str, params: dict | None = None,
             max_pages: int = 20) -> list:
    """Page-walk a list endpoint until exhaustion (or `max_pages` reached).

    Some Gitea endpoints (notably `/actions/runs`) wrap the list in a
    `{"workflow_runs": [...]}` envelope; this helper unwraps that shape so
    callers always get a flat list.

    If we exit because `max_pages` is reached AND the last page was full,
    emits a WARN — the caller's view is silently truncated otherwise (this
    matters for `/branches` on a repo with >1000 stale branches accumulated
    before the first sweep).
    """
    results: list = []
    p = dict(params or {})
    p.setdefault("limit", 50)
    for page in range(1, max_pages + 1):
        p["page"] = page
        code, txt = api.get(path, p)
        if code != 200:
            print(f"WARN: GET {path} page={page} → HTTP {code}: {txt[:200]}",
                  file=sys.stderr)
            break
        chunk = json.loads(txt)
        if isinstance(chunk, dict) and "workflow_runs" in chunk:
            chunk = chunk["workflow_runs"]
        if not chunk:
            break
        results.extend(chunk)
        if len(chunk) < p["limit"]:
            break
        if page == max_pages:
            print(f"WARN: GET {path} hit max_pages={max_pages} with full last "
                  f"page — list may be truncated. Increase max_pages or "
                  f"narrow filter.", file=sys.stderr)
    return results


def ensure_label(api: Api, name: str, color: str, description: str = "") -> int | None:
    """Return the label id, creating it if missing. None on API failure.

    Both `weekly-retro-issue.py` and `sweep-remote-stale.py` need this with
    their own label triple — parameterized here so the next ops script gets
    it for free.
    """
    code, txt = api.get("/labels", {"limit": 100})
    if code == 200:
        for label in json.loads(txt):
            if label.get("name") == name:
                return label["id"]
    code, txt = api.post("/labels", {"name": name, "color": color, "description": description})
    if code in (200, 201):
        return json.loads(txt)["id"]
    print(f"WARN: cannot create label '{name}': HTTP {code} {txt[:200]}", file=sys.stderr)
    return None


def find_issue_by_title(api: Api, label: str, title: str) -> dict | None:
    """First issue matching exact `title` within the given `label`, or None.

    Filtering by label keeps the search bounded — same-titled but unrelated
    issues elsewhere are ignored. Used by idempotent ops issue creators that
    PATCH-or-create per ISO week / month.
    """
    code, txt = api.get("/issues", {
        "state": "all", "type": "issues", "labels": label,
        "limit": 50, "sort": "newest",
    })
    if code != 200:
        return None
    for it in json.loads(txt):
        if it.get("title") == title:
            return it
    return None
