#!/usr/bin/env bash
# agent-sweep.sh — 扫描所有 slot，按分级规则回收
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=../lib/agent-pool-lib.sh
source "${SCRIPT_DIR}/../lib/agent-pool-lib.sh"

POOL_ROOT="$(ap_pool_root)"
SWEEP_LOG="${POOL_ROOT}/sweep.log"
mkdir -p "${POOL_ROOT}"

DRY_RUN=false
RESET_WORKSPACE=false
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run) DRY_RUN=true; shift ;;
    --reset)   RESET_WORKSPACE=true; shift ;;
    -h|--help)
      cat <<EOF
用法: bash $0 [--dry-run] [--reset]
  --dry-run   只汇报会回收哪些 slot，不动 lock
  --reset     stale 回收时同步调 agent-release.sh 重置工作区
              （注：agent-dead / orphan 始终走 release 分级，不受此开关影响）

回收对象（abandoned 始终跳过——需人工 review）：
  stale       — heartbeat 已超时或心跳进程已死
  agent-dead  — agent_ppid 已死（agent 退出但 heartbeat 还在跑）→ 走 release 分级
  orphan      — agent 退出 + task_branch 远端已删 + 工作区干净 + commit 已合并
                （PR merged + auto-delete-on-merge 后的假占用，向后兼容）

环境变量：
  AP_ORPHAN_MIN_AGE   agent-dead / orphan 判定的 claim 年龄下限（秒，默认 300）
                      低于此值视为活跃 agent，跳过检查
EOF
      exit 0 ;;
    *) echo "未知参数: $1" >&2; exit 1 ;;
  esac
done

NOW="$(ap_now_iso)"
echo "agent-sweep: 开始扫描，时间 ${NOW}"

reclaimed=0

# ----- 1) stale：heartbeat 超时或心跳进程死 -----
for slot in $(ap_list_slots); do
  state="$(ap_slot_state "${slot}")"
  if [[ "${state}" != stale ]]; then
    continue
  fi

  hb="$(ap_read_lock "${slot}" heartbeat_at || echo '?')"
  pid="$(ap_read_lock "${slot}" pid || echo '?')"
  branch="$(ap_read_lock "${slot}" task_branch || echo '?')"
  msg="slot-${slot} stale (heartbeat=${hb} pid=${pid} branch=${branch})"

  if [[ "${DRY_RUN}" == true ]]; then
    echo "  [dry-run] 将回收: ${msg}"
    continue
  fi

  echo "  回收: ${msg}"
  echo "[${NOW}] reclaimed ${msg}" >> "${SWEEP_LOG}"

  if [[ "${RESET_WORKSPACE}" == true ]]; then
    bash "${SCRIPT_DIR}/agent-release.sh" "${slot}" || true
  else
    rm -f "$(ap_lock_file "${slot}")"
  fi
  reclaimed=$((reclaimed + 1))
done

# ----- 2) agent-dead：agent_ppid 已死，走 release 分级 -----
# 这是新的主回收路径——心跳 daemon 自检漏掉的（hook 没装、心跳脚本 crash 等）由 sweep 兜住。
# release 内部按 workspace 状态分级：clean→真释放，dirty/unpushed→转 abandoned。
for slot in $(ap_list_slots); do
  state="$(ap_slot_state "${slot}")"
  [[ "${state}" == claimed ]] || continue

  # 必须有 agent_ppid 字段才能判定（老 lock 跳过——orphan 路径会兜）
  agent_ppid="$(ap_read_lock "${slot}" agent_ppid || true)"
  [[ -n "${agent_ppid}" ]] || continue

  # 防误杀：太年轻的 claim 跳过
  claimed_at="$(ap_read_lock "${slot}" claimed_at || true)"
  if [[ -n "${claimed_at}" ]]; then
    claimed_epoch="$(ap_iso_to_epoch "${claimed_at}")"
    now_epoch="$(ap_now_epoch)"
    if [[ -n "${claimed_epoch}" ]] && (( now_epoch - claimed_epoch < AP_ORPHAN_MIN_AGE )); then
      continue
    fi
  fi

  if ap_agent_alive "${slot}"; then
    continue
  fi

  branch="$(ap_read_lock "${slot}" task_branch || echo '?')"
  ws_status="$(ap_workspace_status "${slot}")"
  msg="slot-${slot} agent-dead (agent_ppid=${agent_ppid} branch=${branch} workspace=${ws_status})"

  if [[ "${DRY_RUN}" == true ]]; then
    echo "  [dry-run] 将回收 (agent-dead): ${msg}"
    continue
  fi

  echo "  回收 (agent-dead): ${msg}"
  echo "[${NOW}] reclaimed-agent-dead ${msg}" >> "${SWEEP_LOG}"
  if ! bash "${SCRIPT_DIR}/agent-release.sh" "${slot}"; then
    echo "  ⚠ slot-${slot} release 失败" >&2
    echo "[${NOW}] agent-dead-release-failed slot-${slot}" >> "${SWEEP_LOG}"
    continue
  fi
  reclaimed=$((reclaimed + 1))
done

# ----- 3) orphan：兼容老 lock（无 agent_ppid）——通过 git 状态推断 PR 已合 -----
for slot in $(ap_list_slots); do
  state="$(ap_slot_state "${slot}")"
  [[ "${state}" == claimed ]] || continue
  if ! ap_lock_is_orphan "${slot}"; then
    continue
  fi

  branch="$(ap_read_lock "${slot}" task_branch || echo '?')"
  hb_pid="$(ap_read_lock "${slot}" heartbeat_pid || echo '?')"
  msg="slot-${slot} orphan (branch=${branch} heartbeat_pid=${hb_pid})"

  if [[ "${DRY_RUN}" == true ]]; then
    echo "  [dry-run] 将回收 (orphan): ${msg}"
    continue
  fi

  echo "  回收 (orphan): ${msg}"
  echo "[${NOW}] reclaimed-orphan ${msg}" >> "${SWEEP_LOG}"
  if ! bash "${SCRIPT_DIR}/agent-release.sh" "${slot}"; then
    echo "  ⚠ slot-${slot} release 失败" >&2
    echo "[${NOW}] orphan-release-failed slot-${slot}" >> "${SWEEP_LOG}"
    continue
  fi
  reclaimed=$((reclaimed + 1))
done

echo "agent-sweep: 完成，回收 ${reclaimed} 个 slot"
