import { Injectable, Inject, forwardRef } from '@nestjs/common';
import { createLogger } from '@core/observability/logging/config/winston.config';
import type { ProviderMessage, ProviderResponse } from '../providers/provider.types';
import type { SDKMessageLike } from './sdk-message-stream';
import { ProviderRegistry } from '../providers/provider-registry.service';

const logger = createLogger('QueryEngine');

export interface CompactionOptions {
  /** layer 1：硬上限 token budget；超 budget 触发 trim */
  tokenBudget: number;
  estimateTokens: (text: string) => number;
  /** layer 2：单条 tool_result 文本 token 上限；超出从中间 truncate */
  toolResultMaxTokens?: number;
  /** layer 5：滑窗保留最近 N 对（默认 20）；强制丢弃更早消息 */
  slidingWindowPairs?: number;
}

export interface CompactionResult {
  messages: ProviderMessage[];
  droppedCount: number;
  truncatedToolResults: number;
}

/**
 * QueryEngine —— PR4a TAOR 骨架 + PR4b Compaction（layer 1/2/5）。
 *
 * **PR4a 完整范围与现状**：
 *   - ✅ AsyncGenerator<SDKMessage> 出口形态（runStreaming() 单事件 yield）
 *   - ⚠️ TAOR 多轮循环：mock provider 始终 stop_reason=end_turn，单轮结束；
 *      真 tool_use 路径要等真 LLM provider 接进来（合规放行后）
 *   - ⚠️ StreamingToolExecutor 并发执行：当前是占位（单 turn 无 tool 调用）
 *   - ⚠️ siblingAbortController：当前不支持取消传播；进 PR4a 后期补
 *
 * **PR4b 完整范围与现状**：
 *   - ✅ layer 1 budget trim
 *   - ✅ layer 2 tool_result truncate
 *   - ✅ layer 5 sliding window
 *   - ⚠️ layer 3 semantic summary：需 LLM 调用，等合规通过 + provider 上线
 *   - ⚠️ layer 4 pin working set：working-set 概念在 PR4a 多轮后才有意义
 *
 * 详见 docs/modules/agent/02-architecture.md §1.2 主循环 / §1.3 Compaction
 */
@Injectable()
export class QueryEngine {
  constructor(
    @Inject(forwardRef(() => ProviderRegistry))
    private readonly providerRegistry: ProviderRegistry,
  ) {}

  /**
   * PR4b layer 3 semantic summary —— 在 budget trim 之前先用便宜 LLM 把老历史压缩。
   *
   * 触发条件：总 token > summarizeThreshold（默认 8000）AND 历史消息 > 6 条。
   * 行为：取前一半 user/assistant 消息送 qwen-turbo 摘要 → 替换成一条 system 消息。
   * 失败：silently fallback 到只走 layer 1/2/5（不影响主流程）。
   */
  async summarizeOldHistory(
    history: ProviderMessage[],
    options: {
      estimateTokens: (text: string) => number;
      summarizeThreshold?: number;
      keepLastPairs?: number;
    },
  ): Promise<{ messages: ProviderMessage[]; summarized: boolean; summary?: string }> {
    const threshold = options.summarizeThreshold ?? 8000;
    const keepLast = options.keepLastPairs ?? 3;
    const total = history.reduce((s, m) => s + options.estimateTokens(m.content), 0);
    if (total < threshold) return { messages: history, summarized: false };

    const systemMsgs = history.filter((m) => m.role === 'system');
    const nonSystem = history.filter((m) => m.role !== 'system');
    if (nonSystem.length < keepLast * 2 + 2) return { messages: history, summarized: false };

    const toSummarize = nonSystem.slice(0, nonSystem.length - keepLast * 2);
    const keep = nonSystem.slice(nonSystem.length - keepLast * 2);

    try {
      const response = await this.providerRegistry.invoke(
        {
          model: 'qwen-turbo',
          messages: [
            {
              role: 'system',
              content:
                '你是对话摘要器。把以下用户与 AI 助手的多轮对话压缩成不超过 300 字的中文摘要，保留关键事实、决定、未完成事项；不要省略具体数字、名称、日期。',
            },
            {
              role: 'user',
              content: toSummarize
                .map((m) => `[${m.role}] ${m.content}`)
                .join('\n---\n'),
            },
          ],
          maxTokens: 500,
          temperature: 0.3,
        },
        { model: 'qwen-turbo' },
      );
      const summary = response.text.trim();
      if (!summary) return { messages: history, summarized: false };

      const summarized: ProviderMessage[] = [
        ...systemMsgs,
        {
          role: 'system',
          content: `[历史摘要 PR4b-layer-3 by ${response.resolvedModel ?? 'qwen-turbo'}]\n${summary}`,
        },
        ...keep,
      ];
      logger.log(
        `Compaction layer-3 summary: ${toSummarize.length} msgs → 1 summary (${summary.length} chars)`,
      );
      return { messages: summarized, summarized: true, summary };
    } catch (err) {
      logger.warn(`Compaction layer-3 LLM summary failed: ${(err as Error).message}`);
      return { messages: history, summarized: false };
    }
  }

  compactHistory(history: ProviderMessage[], options: CompactionOptions): CompactionResult {
    const { tokenBudget, estimateTokens } = options;
    const toolResultMaxTokens = options.toolResultMaxTokens ?? 2000;
    const slidingWindowPairs = options.slidingWindowPairs ?? 20;

    let truncatedToolResults = 0;

    // layer 2：tool_result 单条 token 上限 → 中间 truncate
    let processed = history.map((m) => {
      if (m.role === 'tool' && estimateTokens(m.content) > toolResultMaxTokens) {
        truncatedToolResults += 1;
        return {
          ...m,
          content: truncateMiddle(m.content, toolResultMaxTokens, estimateTokens),
        };
      }
      return m;
    });

    // layer 5：sliding window 丢弃过早 user/assistant 对（system / tool 保留位置）
    const nonSystem = processed.filter((m) => m.role !== 'system');
    const systemMsgs = processed.filter((m) => m.role === 'system');
    const keepCount = slidingWindowPairs * 2;
    let droppedBySliding = 0;
    if (nonSystem.length > keepCount) {
      droppedBySliding = nonSystem.length - keepCount;
      processed = [...systemMsgs, ...nonSystem.slice(-keepCount)];
      logger.log(`Compaction layer-5 sliding-window dropped ${droppedBySliding} oldest msgs`);
    }

    // layer 1：硬 budget trim（如果 layer 5 之后仍超）。预计算 per-message tokens + 维护
    // running total，避免每次 splice 后重算整列（O(n) 一次而不是 O(n²)）
    let droppedByBudget = 0;
    const tokens = processed.map((m) => estimateTokens(m.content));
    let running = tokens.reduce((s, t) => s + t, 0);
    while (running > tokenBudget && processed.length > 0) {
      const idx = processed.findIndex((m) => m.role !== 'system');
      if (idx < 0) break;
      running -= tokens[idx];
      processed.splice(idx, 1);
      tokens.splice(idx, 1);
      droppedByBudget += 1;
    }
    if (droppedByBudget > 0) {
      logger.log(`Compaction layer-1 budget-trim dropped ${droppedByBudget} more msgs`);
    }

    return {
      messages: processed,
      droppedCount: droppedBySliding + droppedByBudget,
      truncatedToolResults,
    };
  }

  /**
   * PR4a TAOR 主循环（AsyncGenerator 形态）。
   *
   * 现状：mock provider 同步返回 end_turn，循环单 turn 即终止；
   * 真 streaming 接进来后这里改为消费 provider 的 chunk stream + tool_use 派发。
   *
   * 调用方：messages.service 暂仍直接用 ProviderRegistry.invoke()。
   * 这个方法是 PR4a 形态预埋——SSE 接进来后切到这里。
   */
  async *runStreaming(args: {
    invoke: () => Promise<ProviderResponse>;
  }): AsyncGenerator<SDKMessageLike, void, void> {
    const response = await args.invoke();
    yield {
      type: 'assistant_text',
      text: response.text,
      model: response.resolvedModel ?? response.model,
    };
    yield {
      type: 'turn_done',
      stopReason: response.stopReason,
      usage: response.usage,
    };
    // 真 streaming + tool_use 接进来后：
    //   while (!stopReason 决策为 end_turn) {
    //     消费 provider chunk，遇 tool_use → StreamingToolExecutor.dispatch
    //     等 tool_result → 拼回 messages 再次调 provider
    //     siblingAbortController 监听用户 cancel
    //   }
  }
}

function truncateMiddle(
  text: string,
  maxTokens: number,
  estimateTokens: (s: string) => number,
): string {
  const total = estimateTokens(text);
  if (total <= maxTokens) return text;
  const ratio = maxTokens / total;
  const keepChars = Math.floor(text.length * ratio);
  const halfKeep = Math.floor(keepChars / 2);
  return (
    text.slice(0, halfKeep) +
    `\n... [truncated ${total - maxTokens} tokens by PR4b layer-2] ...\n` +
    text.slice(text.length - halfKeep)
  );
}
