#!/usr/bin/env bash
# gather.sh — 收集指定日期的开发活动原始数据，输出给 AI 生成日报
# 用法：bash .agents/skills/daily-report/scripts/gather.sh [DATE]
# 默认 DATE=今天；可传 YYYY-MM-DD 生成历史日报

set -uo pipefail

DATE="${1:-$(date +%Y-%m-%d)}"
SINCE="$DATE 00:00:00"
UNTIL="$DATE 23:59:59"

EMAIL=$(git config user.email 2>/dev/null || echo "")
NAME=$(git config user.name 2>/dev/null || echo "")
USERNAME=$(echo "$EMAIL" | cut -d@ -f1 | tr '.' '-')

if [ -z "$EMAIL" ]; then
  echo "ERROR: git config user.email 未设置，无法生成日报" >&2
  exit 1
fi

REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)
cd "$REPO_ROOT"

echo "===== METADATA ====="
echo "DATE: $DATE"
echo "EMAIL: $EMAIL"
echo "NAME: $NAME"
echo "USERNAME: $USERNAME"
echo "REPO: $REPO_ROOT"
echo "CURRENT_BRANCH: $(git branch --show-current 2>/dev/null || echo unknown)"
echo ""

echo "===== TODAY GIT COMMITS (all branches, this author) ====="
git log --all --since="$SINCE" --until="$UNTIL" --author="$EMAIL" \
  --pretty=format:"%h | %ai | %s | %d" --abbrev-commit 2>/dev/null | head -200
echo ""
echo ""

echo "===== TODAY GIT COMMITS SHORTSTAT ====="
git log --all --since="$SINCE" --until="$UNTIL" --author="$EMAIL" \
  --shortstat --pretty=format:"== %h %s ==" 2>/dev/null | head -300
echo ""
echo ""

echo "===== TODAY CHANGED FILES (top 20 by churn) ====="
git log --all --since="$SINCE" --until="$UNTIL" --author="$EMAIL" \
  --name-only --pretty=format:"" 2>/dev/null | sort | uniq -c | sort -rn | head -20
echo ""

echo "===== TODAY GITEA PR ACTIVITY ====="
GITEA_TOKEN="${GITEA_API_TOKEN:-}"
GITEA_HOST="http://43.130.59.228"
GITEA_REPO="FFAIWorkspace/workspace"
# 兜底：从 git credential store 读
if [ -z "$GITEA_TOKEN" ]; then
  GITEA_TOKEN=$(git credential fill <<< $'protocol=http\nhost=43.130.59.228\n' 2>/dev/null | grep "^password=" | cut -d= -f2)
fi
if [ -n "$GITEA_TOKEN" ]; then
  GITEA_USER=$(curl -s -H "Authorization: token $GITEA_TOKEN" "$GITEA_HOST/api/v1/user" 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('login',''))" 2>/dev/null || echo "")
  if [ -n "$GITEA_USER" ]; then
    echo "Gitea user: $GITEA_USER"
    echo ""
    echo "-- PRs touched today (created/merged/closed) --"
    curl -s -H "Authorization: token $GITEA_TOKEN" \
      "$GITEA_HOST/api/v1/repos/$GITEA_REPO/pulls?state=all&type=created_by&poster=$GITEA_USER&limit=50" 2>/dev/null \
      | python3 -c "
import sys, json
target='$DATE'
try:
  prs=json.load(sys.stdin)
  hits=[]
  for pr in prs:
    created=(pr.get('created_at') or '')[:10]
    merged=(pr.get('merged_at') or '')[:10]
    closed=(pr.get('closed_at') or '')[:10]
    if target in (created, merged, closed):
      status='merged' if pr.get('merged') else ('closed' if pr.get('state')=='closed' else 'open')
      hits.append(f\"#{pr['number']} [{status}] {pr['title']} | base={pr['base']['ref']} ← head={pr['head']['ref']}\")
  if hits:
    print('\n'.join(hits))
  else:
    print('(no PR touched today)')
except Exception as e:
  print(f'(failed to parse Gitea response: {e})')
" 2>/dev/null
  else
    echo "(could not resolve Gitea user from token)"
  fi
else
  echo "(GITEA_API_TOKEN not set and not in git credential store)"
fi
echo ""

echo "===== TODAY CLAUDE CODE SESSIONS ====="
PROJ_DIR_NAME=$(echo "$REPO_ROOT" | sed 's|/|-|g')
SESSION_DIR="$HOME/.claude/projects/${PROJ_DIR_NAME}"
if [ -d "$SESSION_DIR" ]; then
  python3 - <<PY
import os, json, glob
session_dir = "$SESSION_DIR"
target = "$DATE"
files = sorted(glob.glob(os.path.join(session_dir, "*.jsonl")))
sessions_today = []
for f in files:
    user_msgs = []
    has_today = False
    try:
        with open(f) as fh:
            for line in fh:
                try:
                    e = json.loads(line)
                except Exception:
                    continue
                ts = (e.get("timestamp") or "")[:10]
                if ts == target:
                    has_today = True
                    if e.get("type") == "user":
                        msg = e.get("message", {})
                        content = msg.get("content")
                        text = ""
                        if isinstance(content, str):
                            text = content
                        elif isinstance(content, list):
                            for item in content:
                                if isinstance(item, dict) and item.get("type") == "text":
                                    text = item.get("text", "")
                                    break
                        text = text.strip()
                        # 跳过 tool_result / system reminder / 工具回包
                        if (
                            text
                            and not text.startswith("<")
                            and not text.startswith("[")
                            and "tool_use_id" not in text[:100]
                            and "tool_result" not in text[:100]
                        ):
                            user_msgs.append(text[:300])
        if has_today and user_msgs:
            sessions_today.append((os.path.basename(f).replace(".jsonl",""), user_msgs))
    except Exception:
        continue

print(f"Sessions with activity on {target}: {len(sessions_today)}")
print("")
for sid, msgs in sessions_today:
    print(f"-- session {sid[:8]} ({len(msgs)} user turns) --")
    # 取前 5 条人类问题作为话题摘要（已过滤掉系统消息）
    for i, m in enumerate(msgs[:5], 1):
        print(f"  Q{i}: {m}")
    if len(msgs) > 5:
        print(f"  ...({len(msgs)-5} more turns)")
    print("")
PY
else
  echo "(Claude session dir not found: $SESSION_DIR)"
fi
echo ""

echo "===== TODAY NEW/MODIFIED .learnings/ FILES ====="
if [ -d "$REPO_ROOT/.learnings" ]; then
  # 用文件名前缀（YYYY-MM-DD）匹配，比 mtime 更准确
  find "$REPO_ROOT/.learnings" -type f -name "${DATE}*.md" 2>/dev/null \
    | sed "s|$REPO_ROOT/||" | sort
  # 也带上 ERRORS/ERR-YYYYMMDD
  ERR_PREFIX=$(echo "$DATE" | tr -d '-')
  find "$REPO_ROOT/.learnings/ERRORS" -type f -name "ERR-${ERR_PREFIX}*.md" 2>/dev/null \
    | sed "s|$REPO_ROOT/||" | sort
fi
echo ""

echo "===== UNCOMMITTED CHANGES ====="
git status -s 2>/dev/null | head -50
echo ""

echo "===== END OF GATHER ====="
