#!/usr/bin/env bash
# agent-pool-lib.sh — 开发 agent 池共享库
# 提供 lockfile 读写、心跳、stale 判定、池路径探测等基础能力。
# 所有函数以 ap_ 前缀（agent pool）。

# 心跳与 stale 阈值（可被环境变量覆盖，用于测试）
AP_HEARTBEAT_INTERVAL="${AP_HEARTBEAT_INTERVAL:-30}"
AP_STALE_TIMEOUT="${AP_STALE_TIMEOUT:-180}"
# orphan 判定的 claim 年龄下限（秒）。低于此年龄视为活跃 agent，跳过 orphan 判定，
# 防止误杀刚 claim 还没产生 commit 的 slot。
AP_ORPHAN_MIN_AGE="${AP_ORPHAN_MIN_AGE:-300}"
# orphan 合并到的远端 base 分支列表（PR 一定 merge 进这三个之一才视为已合并）。
# 跟 CLAUDE.md 「保护分支」语义保持一致。
AP_PROTECTED_REMOTE_BASES=("origin/develop" "origin/staging" "origin/production")
# L2 patch-id 等价检测：扫描每个 base 多少条历史 commit 作为对比集合。
# squash merge 后 base 上是个全新 SHA，跟本地 HEAD 没祖先关系，必须用 patch-id 等价识别。
# 跟 scripts/ops/sweep-remote-stale.py 的 PATCH_ID_LOOKBACK_DEFAULT 同口径。
AP_ORPHAN_PATCH_ID_LOOKBACK="${AP_ORPHAN_PATCH_ID_LOOKBACK:-500}"

# Per-process patch-id 缓存：同一次 sweep 中 N 个 slot 共享同一 base 的 patch-id 集合，
# 避免 rev-list+diff-tree+patch-id 三联管道在 base 不变时被重复跑 N 次。
# 库被 source 时初始化为空（sweep / claim 都是独立进程，缓存生命周期=单次进程）。
# 注：所有 slot 是同一仓库的 worktree，refs/remotes/origin/* 共享，按 base 名缓存即可。
declare -gA AP_PATCH_ID_CACHE=() 2>/dev/null || true

# 显式禁用开关（CI / 资源紧张机器）
ap_pool_enabled() {
  if [[ "${FFOA_AGENT_POOL_ENABLED:-true}" == "false" ]]; then
    return 1
  fi
  return 0
}

# 主仓库根目录（git common dir 的父目录）。
ap_main_repo_dir() {
  local main_dir
  main_dir="$(git rev-parse --path-format=absolute --git-common-dir)"
  (cd "$(dirname "${main_dir}")" && pwd)
}

# 固化规则：池父目录 = <repo-parent>/<repo-name>-wt
# 不再依赖 "git worktree list 第一个非主条目" 探测，避免脆弱顺序依赖。
ap_pool_parent() {
  local main_dir
  main_dir="$(ap_main_repo_dir)"
  echo "$(dirname "${main_dir}")/$(basename "${main_dir}")-wt"
}

# 池根解析顺序：
#   1) 显式覆盖 $FFOA_AGENT_POOL_ROOT
#   2) 新规则路径 <repo-parent>/<repo-name>-wt/.agent-pool 已存在 → 用它
#   3) legacy 路径 <repo-parent>/ffworkspace-wt/.agent-pool 已存在 → 用它（向后兼容）
#   4) 都不存在 → 默认按新规则（pool-init 会按这里建）
ap_pool_root() {
  if [[ -n "${FFOA_AGENT_POOL_ROOT:-}" ]]; then
    echo "${FFOA_AGENT_POOL_ROOT}"
    return
  fi

  local default_root legacy_root main_dir
  default_root="$(ap_pool_parent)/.agent-pool"
  if [[ -d "${default_root}" ]]; then
    echo "${default_root}"
    return
  fi

  main_dir="$(ap_main_repo_dir)"
  legacy_root="$(dirname "${main_dir}")/ffworkspace-wt/.agent-pool"
  if [[ -d "${legacy_root}" ]]; then
    echo "${legacy_root}"
    return
  fi

  echo "${default_root}"
}

ap_slot_dir()  { echo "$(ap_pool_root)/slot-$1"; }
ap_lock_file() { echo "$(ap_pool_root)/slot-$1.lock"; }
ap_flock_file(){ echo "$(ap_pool_root)/slot-$1.lock.flock"; }

ap_now_iso()   { date -u +%Y-%m-%dT%H:%M:%SZ; }
ap_now_epoch() { date +%s; }

ap_iso_to_epoch() {
  if [[ "$(uname)" == "Darwin" ]]; then
    date -j -u -f "%Y-%m-%dT%H:%M:%SZ" "$1" +%s 2>/dev/null
  else
    date -u -d "$1" +%s 2>/dev/null
  fi
}

# ap_read_lock <slot-num> <key>
ap_read_lock() {
  local f
  f="$(ap_lock_file "$1")"
  [[ -f "${f}" ]] || return 1
  awk -F= -v k="$2" '$1==k {sub(/^[^=]*=/,""); print; exit}' "${f}"
}

# ap_write_lock_atomic <slot-num> <key=val> ...
ap_write_lock_atomic() {
  local slot="$1"; shift
  local f tmp
  f="$(ap_lock_file "${slot}")"
  tmp="${f}.tmp.$$"
  : > "${tmp}"
  for kv in "$@"; do
    echo "${kv}" >> "${tmp}"
  done
  mv "${tmp}" "${f}"
}

ap_update_lock_field() {
  local slot="$1" key="$2" value="$3"
  local f tmp
  f="$(ap_lock_file "${slot}")"
  [[ -f "${f}" ]] || return 1
  tmp="${f}.tmp.$$"
  awk -F= -v k="${key}" -v v="${value}" '
    BEGIN { found=0 }
    $1==k { print k"="v; found=1; next }
    { print }
    END { if (!found) print k"="v }
  ' "${f}" > "${tmp}" && mv "${tmp}" "${f}"
}

# ap_mark_abandoned <slot> <reason>
# 一次 awk 写完 state/abandoned_reason/abandoned_at/heartbeat_pid 四个字段，
# 替代 4 次 ap_update_lock_field 调用（4× IO + 4 次 awk 进程）。
ap_mark_abandoned() {
  local slot="$1" reason="$2"
  local f tmp now
  f="$(ap_lock_file "${slot}")"
  [[ -f "${f}" ]] || return 1
  tmp="${f}.tmp.$$"
  now="$(ap_now_iso)"
  awk -F= -v reason="${reason}" -v now="${now}" '
    BEGIN { sf=0; rf=0; af=0; hf=0 }
    $1=="state"             { print "state=abandoned";        sf=1; next }
    $1=="abandoned_reason"  { print "abandoned_reason="reason; rf=1; next }
    $1=="abandoned_at"      { print "abandoned_at="now;        af=1; next }
    $1=="heartbeat_pid"     { print "heartbeat_pid=";          hf=1; next }
    { print }
    END {
      if (!sf) print "state=abandoned"
      if (!rf) print "abandoned_reason="reason
      if (!af) print "abandoned_at="now
      if (!hf) print "heartbeat_pid="
    }
  ' "${f}" > "${tmp}" && mv "${tmp}" "${f}"
}

# ap_lock_is_stale <slot> -> 0 if stale (claimable), 1 if alive
# 活性看 heartbeat_pid（长驻心跳守护），不是 pid（claim 调用进程，调用完就退）
ap_lock_is_stale() {
  local slot="$1"
  local f hb hb_pid host now epoch
  f="$(ap_lock_file "${slot}")"
  [[ -f "${f}" ]] || return 0

  hb="$(ap_read_lock "${slot}" heartbeat_at || true)"
  hb_pid="$(ap_read_lock "${slot}" heartbeat_pid || true)"
  host="$(ap_read_lock "${slot}" host || true)"
  now="$(ap_now_epoch)"

  if [[ -n "${hb}" ]]; then
    epoch="$(ap_iso_to_epoch "${hb}")"
    if [[ -n "${epoch}" ]] && (( now - epoch > AP_STALE_TIMEOUT )); then
      return 0
    fi
  fi

  if [[ -n "${hb_pid}" && "${host}" == "$(hostname)" ]]; then
    if ! kill -0 "${hb_pid}" 2>/dev/null; then
      return 0
    fi
  fi

  return 1
}

# ap_base_patch_ids <slot_dir> <base> -> stdout: sort -u 后的 base 最近 N 条 commit
# 的 patch-id 集合，per-process 缓存。
ap_base_patch_ids() {
  local slot_dir="$1" base="$2"
  if [[ -n "${AP_PATCH_ID_CACHE[${base}]+x}" ]]; then
    printf '%s' "${AP_PATCH_ID_CACHE[${base}]}"
    return
  fi
  local ids
  ids="$(
    git -C "${slot_dir}" rev-list --max-count="${AP_ORPHAN_PATCH_ID_LOOKBACK}" --no-merges "${base}" 2>/dev/null \
      | git -C "${slot_dir}" diff-tree -p --stdin 2>/dev/null \
      | git -C "${slot_dir}" patch-id --stable 2>/dev/null \
      | awk '{print $1}' \
      | sort -u
  )"
  AP_PATCH_ID_CACHE[${base}]="${ids}"
  printf '%s' "${ids}"
}

# ap_branch_absorbed <slot_dir> -> 0 if branch content has been absorbed into ANY
# protected base (origin/develop|staging|production), 1 otherwise.
#
# 三层检测，越往后越宽，每层都对所有 protected base 各扫一遍：
#   L1: git merge-base --is-ancestor HEAD <base> → fast-forward 合并已生效
#   L2: 本地分支「merge-base..HEAD 累积 diff」的 patch-id 在 base 最近
#       AP_ORPHAN_PATCH_ID_LOOKBACK 条 commit 的 patch-id 集合里（squash merge zombie）
#   L3: 每个本地领先 commit 的单 commit patch-id 都在 base 集合里（rebase merge zombie）
#
# 设计动机：
# - CLAUDE.md 明确 feature/* → develop 默认 squash merge；squash 后 base 上是单个全新
#   commit，N 个原始 commit 各自的 patch-id 不会等于 squash 后那 1 个，必须比对「累积 diff」。
# - 三个 base 各扫一遍：hotfix → production 直接 squash 而 develop 未同步的场景也能识别。
# - base_ids 集合 per-process 缓存（ap_base_patch_ids），10 slot pool 一次 sweep 每 base 只算一次。
ap_branch_absorbed() {
  ap_ref_absorbed "$1" HEAD
}

# ap_ref_absorbed <slot_dir> <ref> -> 0 if <ref> content has been absorbed into
# ANY protected base (L1 ancestor / L2 cumulative patch-id / L3 per-commit patch-id).
# 跟 ap_branch_absorbed 同语义，但显式接受 ref 参数，方便 release 检查 task 分支 ref
# 而不只是 HEAD（release 切到 park 后 HEAD 已经不是 task 分支）。
ap_ref_absorbed() {
  local slot_dir="$1" ref="$2"
  local base

  # 校验 ref 存在
  git -C "${slot_dir}" rev-parse --verify --quiet "${ref}" >/dev/null 2>&1 || return 1

  # L1: fast-forward 祖先关系
  for base in "${AP_PROTECTED_REMOTE_BASES[@]}"; do
    if git -C "${slot_dir}" rev-parse --verify --quiet "${base}" >/dev/null 2>&1; then
      if git -C "${slot_dir}" merge-base --is-ancestor "${ref}" "${base}" 2>/dev/null; then
        return 0
      fi
    fi
  done

  # L2/L3: patch-id 等价
  for base in "${AP_PROTECTED_REMOTE_BASES[@]}"; do
    if ap_ref_absorbed_by_base "${slot_dir}" "${ref}" "${base}"; then
      return 0
    fi
  done

  return 1
}

ap_ref_absorbed_by_base() {
  local slot_dir="$1" ref="$2" base="$3"

  git -C "${slot_dir}" rev-parse --verify --quiet "${base}" >/dev/null 2>&1 || return 1

  local mb
  mb="$(git -C "${slot_dir}" merge-base "${ref}" "${base}" 2>/dev/null)"
  [[ -n "${mb}" ]] || return 1

  local base_ids
  base_ids="$(ap_base_patch_ids "${slot_dir}" "${base}")"
  [[ -n "${base_ids}" ]] || return 1

  local cumulative_id
  cumulative_id="$(
    git -C "${slot_dir}" diff "${mb}" "${ref}" 2>/dev/null \
      | git -C "${slot_dir}" patch-id --stable 2>/dev/null \
      | awk '{print $1}'
  )"
  if [[ -n "${cumulative_id}" ]] && grep -qxF "${cumulative_id}" <<< "${base_ids}"; then
    return 0
  fi

  local branch_ids
  branch_ids="$(
    git -C "${slot_dir}" rev-list --no-merges "${base}..${ref}" 2>/dev/null \
      | git -C "${slot_dir}" diff-tree -p --stdin 2>/dev/null \
      | git -C "${slot_dir}" patch-id --stable 2>/dev/null \
      | awk '{print $1}'
  )"
  [[ -n "${branch_ids}" ]] || return 1

  local bid
  while IFS= read -r bid; do
    [[ -z "${bid}" ]] && continue
    grep -qxF "${bid}" <<< "${base_ids}" || return 1
  done <<< "${branch_ids}"

  return 0
}

# ap_lock_is_orphan <slot> -> 0 if orphan (claim-able after release), 1 if not
#
# orphan 含义：agent 进程早已退出，但 claim 时启动的 heartbeat 守护进程是 detached 的，
# 还在更新心跳，导致 ap_lock_is_stale 永远返回 not-stale。这种 slot 是"假占用"，必须识别。
#
# 判定（全部满足才视为 orphan）：
#   1) lock 存在 + task_branch 非空 + slot 工作目录存在
#   2) claim 时间 ≥ AP_ORPHAN_MIN_AGE（防误杀刚 claim 还没干活的活 agent）
#   3) slot worktree 干净（git status --porcelain 空，无未提交改动）
#   4) 本地分支内容已被任一保护 base 吸收（L1 FF 祖先 / L2 patch-id 等价；ap_branch_absorbed）
#   5) 远端 origin 已无 task_branch 同名分支（PR merged + auto-delete-on-merge）
#
# 网络 IO（git ls-remote）放在最后做，前 4 项任一不满足直接 short-circuit 返回。
ap_lock_is_orphan() {
  local slot="$1"
  local f task_branch slot_dir claimed_at claimed_epoch now
  f="$(ap_lock_file "${slot}")"
  [[ -f "${f}" ]] || return 1

  task_branch="$(ap_read_lock "${slot}" task_branch || true)"
  [[ -n "${task_branch}" ]] || return 1

  slot_dir="$(ap_slot_dir "${slot}")"
  [[ -d "${slot_dir}" ]] || return 1

  claimed_at="$(ap_read_lock "${slot}" claimed_at || true)"
  if [[ -n "${claimed_at}" ]]; then
    claimed_epoch="$(ap_iso_to_epoch "${claimed_at}")"
    now="$(ap_now_epoch)"
    if [[ -n "${claimed_epoch}" ]] && (( now - claimed_epoch < AP_ORPHAN_MIN_AGE )); then
      return 1
    fi
  fi

  if [[ -n "$(git -C "${slot_dir}" status --porcelain 2>/dev/null)" ]]; then
    return 1
  fi

  if ! ap_branch_absorbed "${slot_dir}"; then
    return 1
  fi

  if git -C "${slot_dir}" ls-remote --exit-code origin "refs/heads/${task_branch}" >/dev/null 2>&1; then
    return 1
  fi

  return 0
}

ap_with_flock() {
  local slot="$1"; shift
  local lockf
  lockf="$(ap_flock_file "${slot}")"
  mkdir -p "$(dirname "${lockf}")"
  : > "${lockf}"
  (
    flock -n 200 || exit 99
    "$@"
  ) 200>"${lockf}"
}

ap_list_slots() {
  local root
  root="$(ap_pool_root)"
  [[ -d "${root}" ]] || return 0
  find "${root}" -maxdepth 1 -mindepth 1 -type d -name 'slot-*' \
    | sed -E 's|.*/slot-([0-9]+)$|\1|' \
    | sort -n
}

ap_slot_state() {
  local slot="$1"
  local f
  f="$(ap_lock_file "${slot}")"
  [[ -f "${f}" ]] || { echo free; return; }
  if [[ "$(ap_read_lock "${slot}" state || true)" == "abandoned" ]]; then
    echo abandoned
    return
  fi
  if ap_lock_is_stale "${slot}"; then
    echo stale
  else
    echo claimed
  fi
}

# ap_workspace_status <slot> -> stdout: clean | dirty | unpushed
# - clean    : git status 空 + 当前分支无 unpushed commit（HEAD 已被 origin 覆盖）
# - dirty    : git status 非空（uncommitted 改动）
# - unpushed : git status 空但 HEAD 领先于 origin/<branch>，或本地无 upstream
# 返回值始终是 stdout 上的字符串，调用者用 case 匹配。
ap_workspace_status() {
  local slot="$1"
  local slot_dir
  slot_dir="$(ap_slot_dir "${slot}")"
  [[ -d "${slot_dir}" ]] || { echo clean; return; }

  if [[ -n "$(git -C "${slot_dir}" status --porcelain 2>/dev/null)" ]]; then
    echo dirty
    return
  fi

  local branch upstream ahead
  branch="$(git -C "${slot_dir}" rev-parse --abbrev-ref HEAD 2>/dev/null || true)"
  if [[ -z "${branch}" || "${branch}" == "HEAD" ]]; then
    echo clean
    return
  fi

  # park 分支（pool/slot-N）按设计不会 push，跳过 unpushed 检查
  if [[ "${branch}" == pool/slot-* ]]; then
    echo clean
    return
  fi

  # 三种"看着像 unpushed"的情况都要过 ap_branch_absorbed 兜底（rev-list 识别不了
  # squash 后 SHA 不同但 patch-id 等价的合并）：
  #   1) 本地无 upstream
  #   2) 有 upstream 但 HEAD 领先 upstream
  # 内容已被任一 protected base 吸收 → clean；否则真 unpushed。
  local needs_absorb_check=false
  upstream="$(git -C "${slot_dir}" rev-parse --abbrev-ref --symbolic-full-name '@{u}' 2>/dev/null || true)"
  if [[ -z "${upstream}" ]]; then
    needs_absorb_check=true
  else
    ahead="$(git -C "${slot_dir}" rev-list --count "${upstream}..HEAD" 2>/dev/null || echo 0)"
    [[ "${ahead}" -gt 0 ]] && needs_absorb_check=true
  fi

  if [[ "${needs_absorb_check}" == true ]]; then
    if ap_branch_absorbed "${slot_dir}"; then echo clean; else echo unpushed; fi
  else
    echo clean
  fi
}

# ap_agent_alive <slot> -> 0 if agent_ppid 还活着, 1 if 已死或字段缺失
# 字段缺失视为"老 lock，无法判定" → 返回 1（不算活着），让 sweep/heartbeat 决定降级路径
ap_agent_alive() {
  local slot="$1"
  local ppid host
  ppid="$(ap_read_lock "${slot}" agent_ppid || true)"
  host="$(ap_read_lock "${slot}" host || true)"
  [[ -n "${ppid}" ]] || return 1
  [[ "${host}" == "$(hostname)" ]] || return 1
  kill -0 "${ppid}" 2>/dev/null
}

# ap_pool_exists -> 0 if pool root has at least one slot dir
ap_pool_exists() {
  local n
  n=$(ap_list_slots | wc -l)
  (( n > 0 ))
}

# -------- VSCode multi-root .code-workspace 维护 --------
# 文件位置：<repo-parent>/<repo-name>.code-workspace（可被 $FFOA_CODEWORKSPACE_FILE 覆盖）
# 协作 Remote-SSH 场景：本地 VSCode 通过 Remote-SSH 打开开发机上这个文件，
# 一个窗口同时挂 main + 全部 slot，多 agent 并行时单 IDE 能看到所有改动。

ap_codeworkspace_file() {
  if [[ -n "${FFOA_CODEWORKSPACE_FILE:-}" ]]; then
    echo "${FFOA_CODEWORKSPACE_FILE}"
    return
  fi
  local main_dir
  main_dir="$(ap_main_repo_dir)"
  echo "$(dirname "${main_dir}")/$(basename "${main_dir}").code-workspace"
}

# 重写 .code-workspace：列 main + 所有 slot，slot name 标注当前 task_branch / free。
# python3 缺失时静默跳过（属于"锦上添花"，不阻塞主流程）。
ap_write_code_workspace() {
  if ! command -v python3 >/dev/null 2>&1; then
    return 0
  fi

  AP_CW_FILE="$(ap_codeworkspace_file)" \
  AP_CW_POOL_ROOT="$(ap_pool_root)" \
  AP_CW_MAIN_DIR="$(ap_main_repo_dir)" \
  python3 - <<'PYEOF' || return 0
import json
import os
import sys

file = os.environ['AP_CW_FILE']
pool_root = os.environ['AP_CW_POOL_ROOT']
main_dir = os.environ['AP_CW_MAIN_DIR']

folders = [{"path": main_dir, "name": "main"}]

if os.path.isdir(pool_root):
    def slot_num(name):
        try:
            return int(name.split('-', 1)[1])
        except (IndexError, ValueError):
            return 1 << 30
    slots = sorted(
        [d for d in os.listdir(pool_root)
         if d.startswith('slot-') and os.path.isdir(os.path.join(pool_root, d))],
        key=slot_num,
    )
    for slot in slots:
        slot_dir = os.path.join(pool_root, slot)
        lock = os.path.join(pool_root, f'{slot}.lock')
        name = f'{slot} (free)'
        if os.path.exists(lock):
            try:
                with open(lock, encoding='utf-8') as f:
                    fields = {}
                    for line in f:
                        line = line.rstrip('\n')
                        if '=' in line:
                            k, v = line.split('=', 1)
                            fields[k] = v
                branch = fields.get('task_branch', '').strip()
                name = f'{slot} [{branch}]' if branch else f'{slot} (claimed)'
            except OSError:
                pass
        folders.append({"path": slot_dir, "name": name})

content = {
    "folders": folders,
    "settings": {
        "files.exclude": {
            "**/node_modules": True,
            "**/.next": True,
            "**/dist": True,
            "**/.turbo": True,
        },
        "search.exclude": {
            "**/node_modules": True,
            "**/.next": True,
            "**/dist": True,
            "**/.turbo": True,
            "**/.git": True,
            "**/coverage": True,
        },
        "files.watcherExclude": {
            "**/node_modules/**": True,
            "**/.next/**": True,
            "**/dist/**": True,
            "**/.git/objects/**": True,
        },
    },
}

tmp = f'{file}.tmp.{os.getpid()}'
with open(tmp, 'w', encoding='utf-8') as f:
    json.dump(content, f, indent=2, ensure_ascii=False)
    f.write('\n')
os.replace(tmp, file)
print(f'codeworkspace: wrote {file} ({len(folders)} folders)', file=sys.stderr)
PYEOF
}

ap_remove_code_workspace() {
  local file
  file="$(ap_codeworkspace_file)"
  if [[ -f "${file}" ]]; then
    rm -f "${file}"
    echo "codeworkspace: removed ${file}" >&2
  fi
}
