import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { PrismaService } from '@core/database/prisma/prisma.service';
import { KnowledgeArticle, RagflowSourceType, SyncItemStatus, SyncStatus } from '@prisma/client';
import * as crypto from 'crypto';
import * as fs from 'fs';
import * as path from 'path';
import { SharePointSyncService, SyncedSharePointDocument } from './sharepoint-sync.service';
import { RagflowService } from './ragflow.service';
import { ProseMirrorTextService } from './prosemirror-text.service';

export interface SyncResult {
  taskId: string;
  totalItems: number;
  processedItems: number;
  processingItems: number;
  failedItems: number;
  skippedItems: number;
  processedTokens: number;
  processedChunks: number;
  status: SyncStatus;
  error?: string;
  triggerSource?: string | null;
}

export interface SyncTaskListItem {
  id: string;
  taskType: string;
  triggerSource?: string | null;
  status: SyncStatus;
  totalItems: number | null;
  processedItems: number;
  processingItems: number;
  failedItems: number;
  skippedItems: number;
  processedTokens: number;
  processedChunks: number;
  error?: string | null;
  startedAt?: Date | null;
  completedAt?: Date | null;
  createdAt: Date;
}

export interface SyncTaskListResult {
  items: SyncTaskListItem[];
  total: number;
}

export interface SyncTaskSkippedItem {
  id: string;
  taskId: string;
  sourceType: RagflowSourceType;
  sourceId: string;
  filename: string;
  fileExtension: string | null;
  mimeType: string | null;
  reason: string;
  createdAt: Date;
}

export interface SyncTaskSkippedResult {
  items: SyncTaskSkippedItem[];
  total: number;
}

export interface SyncTaskProcessedItem {
  id: string;
  taskId: string;
  sourceType: RagflowSourceType;
  sourceId: string;
  filename: string;
  fileExtension: string | null;
  mimeType: string | null;
  ragflowDocumentId: string | null;
  tokenCount: number | null;
  chunkCount: number | null;
  sourceSizeBytes: number | null;
  status: SyncItemStatus;
  createdAt: Date;
}

export interface SyncTaskProcessedResult {
  items: SyncTaskProcessedItem[];
  total: number;
}

export interface SyncTaskFailedItem {
  id: string;
  taskId: string;
  sourceType: RagflowSourceType;
  sourceId: string;
  filename: string;
  fileExtension: string | null;
  mimeType: string | null;
  error: string;
  createdAt: Date;
}

export interface SyncTaskFailedResult {
  items: SyncTaskFailedItem[];
  total: number;
}

interface RagflowDocumentStats {
  ragflowDocumentId: string;
  tokenCount: number;
  chunkCount: number;
  sourceSizeBytes?: number | null;
  status: SyncItemStatus;
}

interface RagflowDocumentState {
  state: 'COMPLETED' | 'PROCESSING' | 'FAILED';
  tokenCount: number;
  chunkCount: number;
  error?: string;
}

@Injectable()
export class RagflowSyncService {
  private readonly logger = new Logger(RagflowSyncService.name);

  constructor(
    private readonly prisma: PrismaService,
    private readonly sharePointSyncService: SharePointSyncService,
    private readonly ragflowService: RagflowService,
    private readonly prosemirrorTextService: ProseMirrorTextService,
    private readonly configService: ConfigService,
  ) {}

  async syncAll(triggerSource = 'SCHEDULED_FULL'): Promise<SyncResult> {
    const activeTaskId = await this.getActiveSyncTaskId();
    if (activeTaskId) {
      this.logger.warn(`Sync already running (${activeTaskId}), skipping scheduled sync.`);
      return {
        taskId: activeTaskId,
        totalItems: 0,
        processedItems: 0,
        processingItems: 0,
        failedItems: 0,
        skippedItems: 0,
        processedTokens: 0,
        processedChunks: 0,
        status: SyncStatus.SYNCING,
      };
    }

    const task = await this.prisma.syncTask.create({
      data: {
        taskType: 'FULL_SYNC',
        triggerSource,
        status: SyncStatus.SYNCING,
        startedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });

    return this.executeSync(task.id);
  }

  async startFullSync(triggerSource = 'MANUAL_FULL'): Promise<string> {
    const activeTaskId = await this.getActiveSyncTaskId();
    if (activeTaskId) {
      return activeTaskId;
    }

    const task = await this.prisma.syncTask.create({
      data: {
        taskType: 'FULL_SYNC',
        triggerSource,
        status: SyncStatus.SYNCING,
        startedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });

    void this.executeSync(task.id).catch((error: any) => {
      this.logger.error(`Full sync background error: ${error.message}`);
    });

    return task.id;
  }

  async startDeltaSync(): Promise<{ taskId: string; fallbackToFull: boolean; reason?: string }> {
    const activeTaskId = await this.getActiveSyncTaskId();
    if (activeTaskId) {
      return { taskId: activeTaskId, fallbackToFull: false };
    }

    const fallback = await this.sharePointSyncService.shouldFallbackToFullSyncForDelta();
    if (fallback.shouldFallback) {
      this.logger.warn(
        `Delta sync fallback to full sync: ${fallback.reason ?? 'delta cursor unavailable'}`,
      );
      const taskId = await this.startFullSync('DELTA_FALLBACK');
      return {
        taskId,
        fallbackToFull: true,
        reason: fallback.reason,
      };
    }

    const task = await this.prisma.syncTask.create({
      data: {
        taskType: 'INCREMENTAL',
        triggerSource: 'MANUAL_DELTA',
        status: SyncStatus.SYNCING,
        startedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });

    void this.executeDeltaSync(task.id).catch((error: any) => {
      this.logger.error(`Delta sync background error: ${error.message}`);
    });

    return { taskId: task.id, fallbackToFull: false };
  }
  async terminateSyncTask(taskId: string): Promise<SyncResult> {
    const task = await this.prisma.syncTask.findUnique({
      where: { id: taskId },
    });
    if (!task) {
      throw new NotFoundException(`Sync task ${taskId} not found`);
    }
    if (task.status !== SyncStatus.SYNCING) {
      return {
        taskId: task.id,
        totalItems: task.totalItems ?? 0,
        processedItems: task.processedItems ?? 0,
        processingItems: task.processingItems ?? 0,
        failedItems: task.failedItems ?? 0,
        skippedItems: task.skippedItems ?? 0,
        processedTokens: task.processedTokens ?? 0,
        processedChunks: task.processedChunks ?? 0,
        status: task.status,
        error: task.error ?? undefined,
      };
    }

    const updated = await this.prisma.syncTask.update({
      where: { id: task.id },
      data: {
        status: SyncStatus.FAILED,
        error: '已手动终止',
        completedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });

    return {
      taskId: updated.id,
      totalItems: updated.totalItems ?? 0,
      processedItems: updated.processedItems ?? 0,
      processingItems: updated.processingItems ?? 0,
      failedItems: updated.failedItems ?? 0,
      skippedItems: updated.skippedItems ?? 0,
      processedTokens: updated.processedTokens ?? 0,
      processedChunks: updated.processedChunks ?? 0,
      status: updated.status,
      error: updated.error ?? undefined,
    };
  }

  private async executeSync(taskId: string): Promise<SyncResult> {
    try {
      const startTime = Date.now();
      const timeoutMs = this.getSyncTimeoutMs();
      const progressTimeoutMs = this.getSyncProgressTimeoutMs();
      let lastProgressAtMs = Date.now();
      await this.assertTaskActive(taskId);
      await this.assertNotTimedOut(taskId, startTime, timeoutMs);
      await this.assertNotStalled(taskId, lastProgressAtMs, progressTimeoutMs);

      const datasetId = await this.ragflowService.resolveDatasetId();
      const embeddingUpdate = await this.ragflowService.ensureDatasetEmbeddingModel(datasetId);
      if (embeddingUpdate.reset) {
        await this.prisma.ragflowDocument.deleteMany({
          where: { datasetId },
        });
      }
      let driveId = '';
      let documents: SyncedSharePointDocument[] = [];
      let cursors: Array<{ scopePath: string; deltaLink: string }> = [];
      let spSyncError: string | null = null;

      try {
        const result = await this.sharePointSyncService.syncAllMetadata();
        driveId = result.driveId;
        documents = result.documents;
        cursors = result.cursors ?? [];
      } catch (error: any) {
        spSyncError = error?.message || 'SharePoint 同步失败';
        this.logger.warn(`SharePoint sync skipped: ${spSyncError}`);
      }

      const articles = await this.prisma.knowledgeArticle.findMany({
        where: { deletedAt: null },
      });

      const totalItems = documents.length + articles.length;
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: { totalItems },
      });

      let processedItems = 0;
      let processingItems = 0;
      let failedItems = 0;
      let skippedItems = 0;
      let processedTokens = 0;
      let processedChunks = 0;

      if (documents.length > 0) {
        for (const document of documents) {
          try {
            await this.assertTaskActive(taskId);
            await this.assertNotTimedOut(taskId, startTime, timeoutMs);
            await this.assertNotStalled(taskId, lastProgressAtMs, progressTimeoutMs);
            const skipReason =
              this.getUnsupportedSharePointReason(document) ??
              this.getMultimodalSkipReason(document);
            if (skipReason) {
              this.logger.warn(
                `Unsupported file type for RAGFlow, skipped: ${document.item.name} (${skipReason})`,
              );
              await this.recordSkippedItem(taskId, document, skipReason);
              skippedItems++;
              lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
                processedItems,
                processingItems,
                failedItems,
                skippedItems,
                processedTokens,
                processedChunks,
                forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
              });
              continue;
            }
            const syncResult = await this.syncSharePointDocument(
              taskId,
              datasetId,
              driveId,
              document,
            );
            if (!syncResult) {
              skippedItems++;
              lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
                processedItems,
                processingItems,
                failedItems,
                skippedItems,
                processedTokens,
                processedChunks,
                forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
              });
              continue;
            }
            await this.recordProcessedItem({
              taskId,
              sourceType: RagflowSourceType.SP_DOCUMENT,
              sourceId: document.record.id,
              filename: document.item.name ?? document.record.title ?? document.record.id,
              fileExtension: document.record.fileExtension ?? null,
              mimeType: document.item.file?.mimeType ?? null,
              ragflowDocumentId: syncResult.ragflowDocumentId,
              tokenCount: syncResult.tokenCount,
              chunkCount: syncResult.chunkCount,
              sourceSizeBytes: document.item.size ?? null,
              status: syncResult.status,
            });
            if (syncResult.status === SyncItemStatus.PROCESSING) {
              processingItems++;
            } else {
              processedItems++;
              processedTokens += syncResult.tokenCount;
              processedChunks += syncResult.chunkCount;
            }
            lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
              processedItems,
              processingItems,
              failedItems,
              skippedItems,
              processedTokens,
              processedChunks,
              forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
            });
          } catch (error: any) {
            failedItems++;
            this.logger.error(`Failed to sync SharePoint item ${document.item.id}: ${error.message}`);
            const embeddingFailure = this.isEmbeddingCredentialError(error);
            await this.recordFailedItem({
              taskId,
              sourceType: RagflowSourceType.SP_DOCUMENT,
              sourceId: document.record.id,
              filename: document.item.name ?? document.record.title ?? document.record.id,
              fileExtension: document.record.fileExtension ?? null,
              mimeType: document.item.file?.mimeType ?? null,
              error: error?.message || 'SharePoint sync failed',
              sourceSizeBytes: document.item.size ?? null,
              ragflowLimitBytes: this.getRagflowMaxContentLengthBytes(),
            });
            if (embeddingFailure) {
              throw error;
            }
            lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
              processedItems,
              processingItems,
              failedItems,
              skippedItems,
              processedTokens,
              processedChunks,
              forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
            });
          }
        }
      }

      for (const article of articles) {
        try {
          await this.assertTaskActive(taskId);
          await this.assertNotTimedOut(taskId, startTime, timeoutMs);
          await this.assertNotStalled(taskId, lastProgressAtMs, progressTimeoutMs);
          const syncResult = await this.syncKnowledgeArticle(datasetId, article);
          await this.recordProcessedItem({
            taskId,
            sourceType: RagflowSourceType.ARTICLE,
            sourceId: article.id,
            filename: `${article.title || article.id}.md`,
            fileExtension: 'md',
            mimeType: 'text/markdown',
            ragflowDocumentId: syncResult.ragflowDocumentId,
            tokenCount: syncResult.tokenCount,
            chunkCount: syncResult.chunkCount,
            sourceSizeBytes: syncResult.sourceSizeBytes ?? null,
            status: syncResult.status,
          });
          if (syncResult.status === SyncItemStatus.PROCESSING) {
            processingItems++;
          } else {
            processedItems++;
            processedTokens += syncResult.tokenCount;
            processedChunks += syncResult.chunkCount;
          }
          lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
            processedItems,
            processingItems,
            failedItems,
            skippedItems,
            processedTokens,
            processedChunks,
            forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
          });
        } catch (error: any) {
          failedItems++;
          this.logger.error(`Failed to sync article ${article.id}: ${error.message}`);
          const embeddingFailure = this.isEmbeddingCredentialError(error);
          await this.recordFailedItem({
            taskId,
            sourceType: RagflowSourceType.ARTICLE,
            sourceId: article.id,
            filename: `${article.title || article.id}.md`,
            fileExtension: 'md',
            mimeType: 'text/markdown',
            error: error?.message || 'Article sync failed',
          });
          if (embeddingFailure) {
            throw error;
          }
          lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
            processedItems,
            processingItems,
            failedItems,
            skippedItems,
            processedTokens,
            processedChunks,
            forceCounts:
              (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
          });
        }
      }

      await this.cleanupStaleMappings(datasetId, documents, articles, Boolean(spSyncError));

      if (cursors.length > 0 && driveId) {
        await this.sharePointSyncService.saveDeltaCursors(driveId, cursors);
      }

      const finalStatus = processingItems > 0 ? SyncStatus.SYNCING : SyncStatus.COMPLETED;
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: {
          status: finalStatus,
          processedItems,
          processingItems,
          failedItems,
          skippedItems,
          processedTokens,
          processedChunks,
          completedAt: finalStatus === SyncStatus.COMPLETED ? new Date() : null,
          error: spSyncError,
          lastProgressAt: new Date(),
        },
      });

      return {
        taskId,
        totalItems,
        processedItems,
        processingItems,
        failedItems,
        skippedItems,
        processedTokens,
        processedChunks,
        status: finalStatus,
      };
    } catch (error: any) {
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: {
          status: SyncStatus.FAILED,
          error: error.message || 'Unknown error',
          completedAt: new Date(),
          lastProgressAt: new Date(),
        },
      });

      return {
        taskId,
        totalItems: 0,
        processedItems: 0,
        processingItems: 0,
        failedItems: 0,
        skippedItems: 0,
        processedTokens: 0,
        processedChunks: 0,
        status: SyncStatus.FAILED,
        error: error.message,
      };
    }
  }

  private async executeDeltaSync(taskId: string): Promise<SyncResult> {
    try {
      const startTime = Date.now();
      const timeoutMs = this.getSyncTimeoutMs();
      const progressTimeoutMs = this.getSyncProgressTimeoutMs();
      let lastProgressAtMs = Date.now();
      await this.assertTaskActive(taskId);
      await this.assertNotTimedOut(taskId, startTime, timeoutMs);
      await this.assertNotStalled(taskId, lastProgressAtMs, progressTimeoutMs);

      const datasetId = await this.ragflowService.resolveDatasetId();
      const embeddingUpdate = await this.ragflowService.ensureDatasetEmbeddingModel(datasetId);
      if (embeddingUpdate.reset) {
        await this.prisma.ragflowDocument.deleteMany({
          where: { datasetId },
        });
      }

      let driveId = '';
      let documents: SyncedSharePointDocument[] = [];
      let removedItemIds: string[] = [];
      let cursors: Array<{ scopePath: string; deltaLink: string }> = [];
      let spSyncError: string | null = null;

      try {
        const result = await this.sharePointSyncService.syncDeltaMetadata();
        driveId = result.driveId;
        documents = result.documents;
        removedItemIds = result.removedItemIds;
        cursors = result.cursors;
      } catch (error: any) {
        spSyncError = error?.message || 'SharePoint 增量同步失败';
        this.logger.warn(`SharePoint delta sync skipped: ${spSyncError}`);
      }

      const totalItems = documents.length + removedItemIds.length;
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: { totalItems },
      });

      let processedItems = 0;
      let processingItems = 0;
      let failedItems = 0;
      let skippedItems = 0;
      let processedTokens = 0;
      let processedChunks = 0;

      if (documents.length > 0) {
        for (const document of documents) {
          try {
            await this.assertTaskActive(taskId);
            await this.assertNotTimedOut(taskId, startTime, timeoutMs);
            await this.assertNotStalled(taskId, lastProgressAtMs, progressTimeoutMs);
            const skipReason =
              this.getUnsupportedSharePointReason(document) ??
              this.getMultimodalSkipReason(document);
            if (skipReason) {
              this.logger.warn(
                `Unsupported file type for RAGFlow, skipped: ${document.item.name} (${skipReason})`,
              );
              await this.recordSkippedItem(taskId, document, skipReason);
              skippedItems++;
              lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
                processedItems,
                processingItems,
                failedItems,
                skippedItems,
                processedTokens,
                processedChunks,
                forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
              });
              continue;
            }
            const syncResult = await this.syncSharePointDocument(
              taskId,
              datasetId,
              driveId,
              document,
            );
            if (syncResult) {
              await this.recordProcessedItem({
                taskId,
                sourceType: RagflowSourceType.SP_DOCUMENT,
                sourceId: document.record.id,
                filename: document.record.title,
                fileExtension: document.record.fileExtension ?? null,
                mimeType: document.item.file?.mimeType ?? null,
                ragflowDocumentId: syncResult.ragflowDocumentId,
                tokenCount: syncResult.tokenCount,
                chunkCount: syncResult.chunkCount,
                sourceSizeBytes: document.item.size ?? null,
                status: syncResult.status,
              });
              if (syncResult.status === SyncItemStatus.PROCESSING) {
                processingItems++;
              } else {
                processedItems++;
                processedTokens += syncResult.tokenCount;
                processedChunks += syncResult.chunkCount;
              }
            }
          } catch (error: any) {
            const message = error?.message || 'SharePoint 同步失败';
            await this.recordFailedItem({
              taskId,
              sourceType: RagflowSourceType.SP_DOCUMENT,
              sourceId: document.record.id,
              filename: document.record.title,
              fileExtension: document.record.fileExtension ?? null,
              mimeType: document.item.file?.mimeType ?? null,
              error: message,
            });
            failedItems++;
            if (this.isEmbeddingCredentialError(error)) {
              throw error;
            }
          }
          lastProgressAtMs = await this.updateProgressHeartbeat(taskId, {
            processedItems,
            processingItems,
            failedItems,
            skippedItems,
            processedTokens,
            processedChunks,
            forceCounts: (processedItems + processingItems + failedItems + skippedItems) % 10 === 0,
          });
        }
      }

      if (removedItemIds.length > 0 && driveId) {
        const removedResult = await this.processRemovedSharePointItems({
          taskId,
          datasetId,
          driveId,
          removedItemIds,
        });
        processedItems += removedResult.processedItems;
        failedItems += removedResult.failedItems;
        skippedItems += removedResult.skippedItems;
      }

      if (cursors.length > 0 && driveId) {
        await this.sharePointSyncService.saveDeltaCursors(driveId, cursors);
      }

      const finalError = spSyncError || (failedItems > 0 ? '部分增量同步失败' : null);
      const finalStatus =
        processingItems > 0
          ? SyncStatus.SYNCING
          : (failedItems > 0 || spSyncError ? SyncStatus.FAILED : SyncStatus.COMPLETED);

      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: {
          processedItems,
          processingItems,
          failedItems,
          skippedItems,
          processedTokens,
          processedChunks,
          status: finalStatus,
          error: finalError,
          completedAt: finalStatus === SyncStatus.COMPLETED ? new Date() : null,
          lastProgressAt: new Date(),
        },
      });

      return {
        taskId,
        totalItems,
        processedItems,
        processingItems,
        failedItems,
        skippedItems,
        processedTokens,
        processedChunks,
        status: finalStatus,
        error: finalError ?? undefined,
      };
    } catch (error: any) {
      const message = error?.message || 'Unknown error';
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: {
          status: SyncStatus.FAILED,
          error: message,
          completedAt: new Date(),
          lastProgressAt: new Date(),
        },
      });

      return {
        taskId,
        totalItems: 0,
        processedItems: 0,
        processingItems: 0,
        failedItems: 1,
        skippedItems: 0,
        processedTokens: 0,
        processedChunks: 0,
        status: SyncStatus.FAILED,
        error: message,
      };
    }
  }

  async getTaskStatus(taskId: string): Promise<SyncResult | null> {
    await this.refreshProcessingItems(taskId);
    const task = await this.prisma.syncTask.findUnique({
      where: { id: taskId },
    });

    if (!task) {
      return null;
    }

    const normalized = await this.normalizeSyncTaskStatus(task);
    return {
      taskId: normalized.id,
      totalItems: normalized.totalItems ?? 0,
      processedItems: normalized.processedItems,
      processingItems: normalized.processingItems ?? 0,
      failedItems: normalized.failedItems,
      skippedItems: normalized.skippedItems ?? 0,
      processedTokens: normalized.processedTokens ?? 0,
      processedChunks: normalized.processedChunks ?? 0,
      status: normalized.status,
      error: normalized.error ?? undefined,
      triggerSource: normalized.triggerSource ?? null,
    };
  }

  async listTasks(limit: number, offset: number): Promise<SyncTaskListResult> {
    const [initialItems, total] = await Promise.all([
      this.prisma.syncTask.findMany({
        orderBy: { createdAt: 'desc' },
        take: limit,
        skip: offset,
      }),
      this.prisma.syncTask.count(),
    ]);

    const refreshTargets = initialItems.filter(
      (task) => task.status === SyncStatus.SYNCING && (task.processingItems ?? 0) > 0,
    );
    if (refreshTargets.length > 0) {
      for (const task of refreshTargets) {
        await this.refreshProcessingItems(task.id);
      }
    }

    const items =
      refreshTargets.length > 0
        ? await this.prisma.syncTask.findMany({
            orderBy: { createdAt: 'desc' },
            take: limit,
            skip: offset,
          })
        : initialItems;

    const normalizedItems = await Promise.all(items.map((task) => this.normalizeSyncTaskStatus(task)));
    return {
      items: normalizedItems.map((task) => ({
        id: task.id,
        taskType: task.taskType,
        triggerSource: task.triggerSource ?? null,
        status: task.status,
        totalItems: task.totalItems ?? null,
        processedItems: task.processedItems,
        processingItems: task.processingItems ?? 0,
        failedItems: task.failedItems,
        skippedItems: task.skippedItems ?? 0,
        processedTokens: task.processedTokens ?? 0,
        processedChunks: task.processedChunks ?? 0,
        error: task.error ?? null,
        startedAt: task.startedAt,
        completedAt: task.completedAt,
        createdAt: task.createdAt,
      })),
      total,
    };
  }

  async listSkippedItems(
    taskId: string,
    limit: number,
    offset: number,
  ): Promise<SyncTaskSkippedResult> {
    const [items, total] = await Promise.all([
      this.prisma.syncTaskSkippedItem.findMany({
        where: { taskId },
        orderBy: { createdAt: 'desc' },
        take: limit,
        skip: offset,
      }),
      this.prisma.syncTaskSkippedItem.count({ where: { taskId } }),
    ]);

    return {
      items: items.map((item) => ({
        id: item.id,
        taskId: item.taskId,
        sourceType: item.sourceType,
        sourceId: item.sourceId,
        filename: item.filename,
        fileExtension: item.fileExtension ?? null,
        mimeType: item.mimeType ?? null,
        reason: item.reason,
        createdAt: item.createdAt,
      })),
      total,
    };
  }

  async listProcessedItems(
    taskId: string,
    limit: number,
    offset: number,
  ): Promise<SyncTaskProcessedResult> {
    const [items, total] = await Promise.all([
      this.prisma.syncTaskProcessedItem.findMany({
        where: { taskId, status: SyncItemStatus.COMPLETED },
        orderBy: { createdAt: 'desc' },
        take: limit,
        skip: offset,
      }),
      this.prisma.syncTaskProcessedItem.count({ where: { taskId, status: SyncItemStatus.COMPLETED } }),
    ]);

    return {
      items: items.map((item) => ({
        id: item.id,
        taskId: item.taskId,
        sourceType: item.sourceType,
        sourceId: item.sourceId,
        filename: item.filename,
        fileExtension: item.fileExtension ?? null,
        mimeType: item.mimeType ?? null,
        ragflowDocumentId: item.ragflowDocumentId ?? null,
        tokenCount: item.tokenCount ?? null,
        chunkCount: item.chunkCount ?? null,
        sourceSizeBytes: item.sourceSizeBytes ? Number(item.sourceSizeBytes) : null,
        status: item.status,
        createdAt: item.createdAt,
      })),
      total,
    };
  }

  async listFailedItems(
    taskId: string,
    limit: number,
    offset: number,
  ): Promise<SyncTaskFailedResult> {
    const [items, total] = await Promise.all([
      this.prisma.syncTaskFailedItem.findMany({
        where: { taskId },
        orderBy: { createdAt: 'desc' },
        take: limit,
        skip: offset,
      }),
      this.prisma.syncTaskFailedItem.count({ where: { taskId } }),
    ]);

    return {
      items: items.map((item) => ({
        id: item.id,
        taskId: item.taskId,
        sourceType: item.sourceType,
        sourceId: item.sourceId,
        filename: item.filename,
        fileExtension: item.fileExtension ?? null,
        mimeType: item.mimeType ?? null,
        error: item.error,
        createdAt: item.createdAt,
      })),
      total,
    };
  }

  async listProcessingItems(
    taskId: string,
    limit: number,
    offset: number,
  ): Promise<SyncTaskProcessedResult> {
    const [items, total] = await Promise.all([
      this.prisma.syncTaskProcessedItem.findMany({
        where: { taskId, status: SyncItemStatus.PROCESSING },
        orderBy: { createdAt: 'desc' },
        take: limit,
        skip: offset,
      }),
      this.prisma.syncTaskProcessedItem.count({ where: { taskId, status: SyncItemStatus.PROCESSING } }),
    ]);

    return {
      items: items.map((item) => ({
        id: item.id,
        taskId: item.taskId,
        sourceType: item.sourceType,
        sourceId: item.sourceId,
        filename: item.filename,
        fileExtension: item.fileExtension ?? null,
        mimeType: item.mimeType ?? null,
        ragflowDocumentId: item.ragflowDocumentId ?? null,
        tokenCount: item.tokenCount ?? null,
        chunkCount: item.chunkCount ?? null,
        sourceSizeBytes: item.sourceSizeBytes ? Number(item.sourceSizeBytes) : null,
        status: item.status,
        createdAt: item.createdAt,
      })),
      total,
    };
  }

  private async refreshProcessingItems(taskId: string): Promise<void> {
    const processingItems = await this.prisma.syncTaskProcessedItem.findMany({
      where: { taskId, status: SyncItemStatus.PROCESSING },
      orderBy: { createdAt: 'asc' },
    });
    if (processingItems.length === 0) {
      await this.prisma.syncTask.update({
        where: { id: taskId },
        data: { processingItems: 0 },
      });
      return;
    }

    const datasetId = await this.ragflowService.resolveDatasetId();
    const progressTimeoutMs = this.getSyncProgressTimeoutMs();
    const now = Date.now();

    for (const item of processingItems) {
      if (!item.ragflowDocumentId) {
        await this.recordFailedItem({
          taskId,
          sourceType: item.sourceType,
          sourceId: item.sourceId,
          filename: item.filename,
          fileExtension: item.fileExtension ?? null,
          mimeType: item.mimeType ?? null,
          error: 'RAGFlow 文档缺失，无法获取处理状态',
        });
        await this.prisma.syncTaskProcessedItem.delete({ where: { id: item.id } });
        continue;
      }

      const remote = await this.ragflowService.getDocumentStatus(datasetId, item.ragflowDocumentId);
      const state = this.resolveRagflowDocumentState(remote);

      if (state.state === 'PROCESSING') {
        if (progressTimeoutMs > 0 && now - item.createdAt.getTime() > progressTimeoutMs) {
          const timeoutMinutes = Math.round(progressTimeoutMs / 60000);
          await this.recordFailedItem({
            taskId,
            sourceType: item.sourceType,
            sourceId: item.sourceId,
            filename: item.filename,
            fileExtension: item.fileExtension ?? null,
            mimeType: item.mimeType ?? null,
            error: `处理中超时（超过 ${timeoutMinutes} 分钟未完成）`,
          });
          await this.prisma.syncTaskProcessedItem.delete({ where: { id: item.id } });
        }
        continue;
      }

      if (state.state === 'FAILED') {
        await this.recordFailedItem({
          taskId,
          sourceType: item.sourceType,
          sourceId: item.sourceId,
          filename: item.filename,
          fileExtension: item.fileExtension ?? null,
          mimeType: item.mimeType ?? null,
          error: state.error ?? 'RAGFlow 解析失败',
        });
        await this.prisma.syncTaskProcessedItem.delete({ where: { id: item.id } });
        await this.prisma.ragflowDocument.updateMany({
          where: { ragflowDocumentId: item.ragflowDocumentId },
          data: {
            status: SyncStatus.FAILED,
            syncError: state.error ?? 'RAGFlow 解析失败',
            lastSyncedAt: new Date(),
          },
        });
        continue;
      }

      await this.prisma.syncTaskProcessedItem.update({
        where: { id: item.id },
        data: {
          status: SyncItemStatus.COMPLETED,
          tokenCount: state.tokenCount,
          chunkCount: state.chunkCount,
        },
      });
      await this.prisma.ragflowDocument.updateMany({
        where: { ragflowDocumentId: item.ragflowDocumentId },
        data: {
          status: SyncStatus.COMPLETED,
          syncError: null,
          lastSyncedAt: new Date(),
        },
      });
    }

    const [processedAgg, processingCount, failedCount, skippedCount] = await Promise.all([
      this.prisma.syncTaskProcessedItem.aggregate({
        where: { taskId, status: SyncItemStatus.COMPLETED },
        _count: { _all: true },
        _sum: { tokenCount: true, chunkCount: true },
      }),
      this.prisma.syncTaskProcessedItem.count({
        where: { taskId, status: SyncItemStatus.PROCESSING },
      }),
      this.prisma.syncTaskFailedItem.count({ where: { taskId } }),
      this.prisma.syncTaskSkippedItem.count({ where: { taskId } }),
    ]);

    await this.prisma.syncTask.update({
      where: { id: taskId },
      data: {
        processedItems: processedAgg._count._all,
        processingItems: processingCount,
        failedItems: failedCount,
        skippedItems: skippedCount,
        processedTokens: processedAgg._sum.tokenCount ?? 0,
        processedChunks: processedAgg._sum.chunkCount ?? 0,
        lastProgressAt: new Date(),
      },
    });
  }

  async syncSingleArticle(articleId: string): Promise<SyncResult> {
    const task = await this.prisma.syncTask.create({
      data: {
        taskType: 'SINGLE_ITEM',
        status: SyncStatus.SYNCING,
        startedAt: new Date(),
        totalItems: 1,
      },
    });

    try {
      const article = await this.prisma.knowledgeArticle.findFirst({
        where: { id: articleId, deletedAt: null },
      });

      if (!article) {
        await this.prisma.syncTask.update({
          where: { id: task.id },
          data: {
            status: SyncStatus.FAILED,
            failedItems: 1,
            error: 'Article not found',
            completedAt: new Date(),
          },
        });
        return {
          taskId: task.id,
          totalItems: 1,
          processedItems: 0,
          processingItems: 0,
          failedItems: 1,
          skippedItems: 0,
          processedTokens: 0,
          processedChunks: 0,
          status: SyncStatus.FAILED,
          error: 'Article not found',
        };
      }

      const datasetId = await this.ragflowService.resolveDatasetId();
      const embeddingUpdate = await this.ragflowService.ensureDatasetEmbeddingModel(datasetId);
      if (embeddingUpdate.reset) {
        await this.prisma.ragflowDocument.deleteMany({
          where: { datasetId },
        });
      }

      let processedItems = 0;
      let processingItems = 0;
      let failedItems = 0;
      let processedTokens = 0;
      let processedChunks = 0;

      try {
        const syncResult = await this.syncKnowledgeArticle(datasetId, article);
        await this.recordProcessedItem({
          taskId: task.id,
          sourceType: RagflowSourceType.ARTICLE,
          sourceId: article.id,
          filename: `${article.title || article.id}.md`,
          fileExtension: 'md',
          mimeType: 'text/markdown',
          ragflowDocumentId: syncResult.ragflowDocumentId,
          tokenCount: syncResult.tokenCount,
          chunkCount: syncResult.chunkCount,
          sourceSizeBytes: syncResult.sourceSizeBytes ?? null,
          status: syncResult.status,
        });
        if (syncResult.status === SyncItemStatus.PROCESSING) {
          processingItems++;
        } else {
          processedItems++;
          processedTokens += syncResult.tokenCount;
          processedChunks += syncResult.chunkCount;
        }
      } catch (error: any) {
        failedItems++;
        this.logger.error(`Failed to sync article ${article.id}: ${error.message}`);
        await this.recordFailedItem({
          taskId: task.id,
          sourceType: RagflowSourceType.ARTICLE,
          sourceId: article.id,
          filename: `${article.title || article.id}.md`,
          fileExtension: 'md',
          mimeType: 'text/markdown',
          error: error?.message || 'Article sync failed',
        });
      }

      const finalStatus = processingItems > 0 ? SyncStatus.SYNCING : SyncStatus.COMPLETED;
      await this.prisma.syncTask.update({
        where: { id: task.id },
        data: {
          status: finalStatus,
          processedItems,
          processingItems,
          failedItems,
          skippedItems: 0,
          processedTokens,
          processedChunks,
          completedAt: finalStatus === SyncStatus.COMPLETED ? new Date() : null,
        },
      });

      return {
        taskId: task.id,
        totalItems: 1,
        processedItems,
        processingItems,
        failedItems,
        skippedItems: 0,
        processedTokens,
        processedChunks,
        status: finalStatus,
      };
    } catch (error: any) {
      await this.prisma.syncTask.update({
        where: { id: task.id },
        data: {
          status: SyncStatus.FAILED,
          error: error.message || 'Unknown error',
          completedAt: new Date(),
        },
      });

      return {
        taskId: task.id,
        totalItems: 1,
        processedItems: 0,
        processingItems: 0,
        failedItems: 1,
        skippedItems: 0,
        processedTokens: 0,
        processedChunks: 0,
        status: SyncStatus.FAILED,
        error: error.message,
      };
    }
  }

  private async syncSharePointDocument(
    taskId: string,
    datasetId: string,
    driveId: string,
    document: SyncedSharePointDocument,
  ): Promise<RagflowDocumentStats | null> {
    const multimodalSkip = this.getMultimodalSkipReason(document);
    if (multimodalSkip) {
      await this.recordSkippedItem(taskId, document, multimodalSkip);
      return null;
    }
    const existingMapping = await this.prisma.ragflowDocument.findUnique({
      where: {
        sourceType_sourceId: {
          sourceType: RagflowSourceType.SP_DOCUMENT,
          sourceId: document.record.id,
        },
      },
    });

    const metadataChanged = this.hasSharePointMetadataChanged(
      document.previousMetadata,
      document.record,
    );
    let forceResyncByRemoteInvalid = false;
    if (!metadataChanged && existingMapping?.ragflowDocumentId) {
      const healthCheckDue = this.shouldCheckRemoteHealth(existingMapping.lastSyncedAt);
      if (existingMapping.status === SyncStatus.COMPLETED && !healthCheckDue) {
        await this.recordSkippedItem(taskId, document, '未变更，跳过下载与嵌入');
        return null;
      }
      const remote = await this.ragflowService.getDocumentStatus(
        datasetId,
        existingMapping.ragflowDocumentId,
      );
      const state = this.resolveRagflowDocumentState(remote);
      if (state.state === 'FAILED') {
        if (!this.ragflowService.isRecoverableDocumentFailure(state.error)) {
          throw new Error(state.error ?? 'RAGFlow parse failed');
        }
        forceResyncByRemoteInvalid = true;
        this.logger.warn(
          `RAGFlow document is invalid for ${document.record.id}, force re-upload. reason=${state.error ?? 'unknown'}`,
        );
      } else {
        await this.prisma.ragflowDocument.update({
          where: { id: existingMapping.id },
          data: {
            status: state.state === 'COMPLETED' ? SyncStatus.COMPLETED : SyncStatus.SYNCING,
            syncError: state.state === 'COMPLETED' ? null : existingMapping.syncError,
            lastSyncedAt: new Date(),
          },
        });
        if (existingMapping.status === SyncStatus.COMPLETED) {
          await this.recordSkippedItem(taskId, document, '未变更（远端健康），跳过下载与嵌入');
          return null;
        }
        return {
          ragflowDocumentId: existingMapping.ragflowDocumentId,
          tokenCount: state.tokenCount,
          chunkCount: state.chunkCount,
          status:
            state.state === 'COMPLETED' ? SyncItemStatus.COMPLETED : SyncItemStatus.PROCESSING,
        };
      }
    }

    let downloadResult = await this.sharePointSyncService.downloadItemToTemp(
      driveId,
      document.item.id,
      document.item.name,
      document.item.size ?? undefined,
      forceResyncByRemoteInvalid
        ? undefined
        : {
            ifNoneMatch: document.previousMetadata?.spEtag ?? null,
            ifModifiedSince: document.previousMetadata?.spModifiedAt ?? null,
          },
    );

    if (downloadResult.status === 'not_modified') {
      if (existingMapping?.status === SyncStatus.COMPLETED && !forceResyncByRemoteInvalid) {
        await this.recordSkippedItem(taskId, document, '未变更（304），跳过下载与嵌入');
        return null;
      }
      downloadResult = await this.sharePointSyncService.downloadItemToTemp(
        driveId,
        document.item.id,
        document.item.name,
        document.item.size ?? undefined,
      );
    }

    if (downloadResult.status === 'not_modified') {
      throw new Error('SharePoint download returned 304 without usable content');
    }

    const { filePath, hash, contentType } = downloadResult;
    const displayName = this.normalizeDisplayName(document.item.name, contentType);

    try {
      return await this.upsertRagflowDocument({
        datasetId,
        sourceType: RagflowSourceType.SP_DOCUMENT,
        sourceId: document.record.id,
        title: document.record.title,
        webUrl: document.record.webUrl ?? undefined,
        docType: document.record.docType,
        authorityLevel: document.record.docAuthorityLevel,
        contentHash: hash,
        filePath,
        displayName,
      });
    } finally {
      await this.safeRemove(filePath);
    }
  }

  private async syncKnowledgeArticle(
    datasetId: string,
    article: KnowledgeArticle,
  ): Promise<RagflowDocumentStats> {
    const text = this.prosemirrorTextService.toText(article.content);
    const sizeBytes = Buffer.byteLength(text, 'utf8');
    const contentHash = this.hashContent(text);
    const filePath = await this.writeTempFile(`${article.title || article.id}.md`, text);

    try {
      const stats = await this.upsertRagflowDocument({
        datasetId,
        sourceType: RagflowSourceType.ARTICLE,
        sourceId: article.id,
        title: article.title,
        docType: 'ARTICLE',
        authorityLevel: article.status === 'PUBLISHED' ? 'PUBLISHED' : 'DRAFT',
        contentHash,
        filePath,
        displayName: `${article.title || article.id}.md`,
      });
      return { ...stats, sourceSizeBytes: sizeBytes };
    } finally {
      await this.safeRemove(filePath);
    }
  }

  private async processRemovedSharePointItems(input: {
    taskId: string;
    datasetId: string;
    driveId: string;
    removedItemIds: string[];
  }): Promise<{ processedItems: number; failedItems: number; skippedItems: number }> {
    const { taskId, datasetId, driveId, removedItemIds } = input;
    if (removedItemIds.length === 0) {
      return { processedItems: 0, failedItems: 0, skippedItems: 0 };
    }

    const records = await this.prisma.sPDocumentIndex.findMany({
      where: {
        spItemId: { in: removedItemIds },
        spDriveId: driveId,
      },
    });

    if (records.length === 0) {
      return { processedItems: 0, failedItems: 0, skippedItems: 0 };
    }

    const recordIds = records.map((record) => record.id);
    const ragflowDocs = await this.prisma.ragflowDocument.findMany({
      where: {
        sourceType: RagflowSourceType.SP_DOCUMENT,
        sourceId: { in: recordIds },
      },
    });
    const ragflowBySourceId = new Map(ragflowDocs.map((doc) => [doc.sourceId, doc]));

    let processedItems = 0;
    let failedItems = 0;
    let skippedItems = 0;
    const deleteIds: string[] = [];
    const successRecordIds: string[] = [];

    for (const record of records) {
      const ragflowDoc = ragflowBySourceId.get(record.id);
      if (ragflowDoc?.ragflowDocumentId) {
        deleteIds.push(ragflowDoc.ragflowDocumentId);
      }
      successRecordIds.push(record.id);
    }

    if (deleteIds.length > 0) {
      try {
        await this.ragflowService.deleteDocuments(datasetId, deleteIds);
      } catch (error: any) {
        const message = error?.message || 'RAGFlow 删除失败';
        for (const record of records) {
          await this.recordFailedItem({
            taskId,
            sourceType: RagflowSourceType.SP_DOCUMENT,
            sourceId: record.id,
            filename: record.title,
            fileExtension: record.fileExtension ?? null,
            mimeType: record.fileType ?? null,
            error: message,
          });
        }
        failedItems += records.length;
        return { processedItems: 0, failedItems, skippedItems };
      }
    }

    await this.prisma.$transaction([
      this.prisma.ragflowDocument.deleteMany({
        where: {
          sourceType: RagflowSourceType.SP_DOCUMENT,
          sourceId: { in: successRecordIds },
        },
      }),
      this.prisma.sPDocumentIndex.deleteMany({
        where: { id: { in: successRecordIds } },
      }),
    ]);

    for (const record of records) {
      const ragflowDoc = ragflowBySourceId.get(record.id);
      await this.recordProcessedItem({
        taskId,
        sourceType: RagflowSourceType.SP_DOCUMENT,
        sourceId: record.id,
        filename: record.title,
        fileExtension: record.fileExtension ?? null,
        mimeType: record.fileType ?? null,
        ragflowDocumentId: ragflowDoc?.ragflowDocumentId ?? null,
        tokenCount: null,
        chunkCount: null,
        sourceSizeBytes: null,
        status: SyncItemStatus.COMPLETED,
      });
      processedItems++;
    }

    return { processedItems, failedItems, skippedItems };
  }

  private async upsertRagflowDocument(options: {
    datasetId: string;
    sourceType: RagflowSourceType;
    sourceId: string;
    title: string;
    webUrl?: string;
    docType?: string;
    authorityLevel?: string;
    contentHash: string;
    filePath: string;
    displayName: string;
  }): Promise<RagflowDocumentStats> {
    const existing = await this.prisma.ragflowDocument.findUnique({
      where: {
        sourceType_sourceId: {
          sourceType: options.sourceType,
          sourceId: options.sourceId,
        },
      },
    });

    let shouldDeleteRemote = true;
    if (existing && existing.contentHash === options.contentHash) {
      const remote = await this.ragflowService.getDocumentStatus(
        options.datasetId,
        existing.ragflowDocumentId,
      );
      if (!remote) {
        this.logger.warn(`RAGFlow document missing, re-uploading: ${existing.ragflowDocumentId}`);
        shouldDeleteRemote = false;
      } else {
        const state = this.resolveRagflowDocumentState(remote);
        if (state.state === 'FAILED') {
          if (this.ragflowService.isRecoverableDocumentFailure(state.error)) {
            this.logger.warn(
              `RAGFlow document failed, retrying upload: ${existing.ragflowDocumentId}`,
            );
          } else {
            await this.prisma.ragflowDocument.update({
              where: { id: existing.id },
              data: {
                status: SyncStatus.FAILED,
                syncError: state.error ?? 'RAGFlow parse failed',
                lastSyncedAt: new Date(),
              },
            });
            throw new Error(state.error ?? 'RAGFlow parse failed');
          }
        } else if (state.state === 'COMPLETED') {
          await this.prisma.ragflowDocument.update({
            where: { id: existing.id },
            data: {
              status: SyncStatus.COMPLETED,
              syncError: null,
              lastSyncedAt: new Date(),
            },
          });
          return {
            ragflowDocumentId: existing.ragflowDocumentId,
            tokenCount: state.tokenCount,
            chunkCount: state.chunkCount,
            status: SyncItemStatus.COMPLETED,
          };
        } else {
          await this.prisma.ragflowDocument.update({
            where: { id: existing.id },
            data: {
              status: SyncStatus.SYNCING,
              syncError: null,
              lastSyncedAt: new Date(),
            },
          });
          return {
            ragflowDocumentId: existing.ragflowDocumentId,
            tokenCount: state.tokenCount,
            chunkCount: state.chunkCount,
            status: SyncItemStatus.PROCESSING,
          };
        }
      }
    }

    if (existing && shouldDeleteRemote) {
      await this.ragflowService.deleteDocuments(options.datasetId, [existing.ragflowDocumentId]);
    }

    let uploaded = await this.ragflowService.uploadDocument(
      options.datasetId,
      options.filePath,
      options.displayName,
    );

    const applyMetadata = async (documentId: string) => {
      await this.ragflowService.updateDocumentMetadata(options.datasetId, documentId, {
        source_type: options.sourceType,
        source_id: options.sourceId,
        source_title: options.title,
        source_url: options.webUrl ?? '',
        doc_type: options.docType ?? '',
        authority_level: options.authorityLevel ?? '',
      });
    };

    await applyMetadata(uploaded.id);

    try {
      await this.ragflowService.parseDocuments(options.datasetId, [uploaded.id]);
    } catch (error: any) {
      const message = error?.message || '';
      if (message.includes('Documents not found')) {
        this.logger.warn(
          `RAGFlow parse failed for missing document, retrying upload: ${uploaded.id}`,
        );
        uploaded = await this.ragflowService.uploadDocument(
          options.datasetId,
          options.filePath,
          options.displayName,
        );
        await applyMetadata(uploaded.id);
        await this.ragflowService.parseDocuments(options.datasetId, [uploaded.id]);
      } else {
        throw error;
      }
    }

    await this.prisma.ragflowDocument.upsert({
      where: {
        sourceType_sourceId: {
          sourceType: options.sourceType,
          sourceId: options.sourceId,
        },
      },
      create: {
        sourceType: options.sourceType,
        sourceId: options.sourceId,
        ragflowDocumentId: uploaded.id,
        datasetId: options.datasetId,
        contentHash: options.contentHash,
        status: SyncStatus.COMPLETED,
        lastSyncedAt: new Date(),
      },
      update: {
        ragflowDocumentId: uploaded.id,
        datasetId: options.datasetId,
        contentHash: options.contentHash,
        status: SyncStatus.COMPLETED,
        syncError: null,
        lastSyncedAt: new Date(),
      },
    });
    const state = await this.fetchDocumentState(options.datasetId, uploaded.id);
    if (state.state === 'FAILED') {
      await this.prisma.ragflowDocument.update({
        where: { sourceType_sourceId: { sourceType: options.sourceType, sourceId: options.sourceId } },
        data: {
          status: SyncStatus.FAILED,
          syncError: state.error ?? 'RAGFlow parse failed',
          lastSyncedAt: new Date(),
        },
      });
      throw new Error(state.error ?? 'RAGFlow parse failed');
    }
    await this.prisma.ragflowDocument.update({
      where: { sourceType_sourceId: { sourceType: options.sourceType, sourceId: options.sourceId } },
      data: {
        status: state.state === 'COMPLETED' ? SyncStatus.COMPLETED : SyncStatus.SYNCING,
        syncError: state.state === 'COMPLETED' ? null : null,
        lastSyncedAt: new Date(),
      },
    });
    return {
      ragflowDocumentId: uploaded.id,
      tokenCount: state.tokenCount,
      chunkCount: state.chunkCount,
      status: state.state === 'COMPLETED' ? SyncItemStatus.COMPLETED : SyncItemStatus.PROCESSING,
    };
  }

  private async fetchDocumentState(
    datasetId: string,
    documentId: string,
  ): Promise<RagflowDocumentState> {
    try {
      const status = await this.ragflowService.getDocumentStatus(datasetId, documentId);
      return this.resolveRagflowDocumentState(status);
    } catch (error: any) {
      this.logger.warn(
        `Failed to fetch RAGFlow document stats ${documentId}: ${error?.message || 'unknown error'}`,
      );
      return { state: 'FAILED', tokenCount: 0, chunkCount: 0, error: error?.message || 'unknown error' };
    }
  }

  private resolveRagflowDocumentState(remote: any | null): RagflowDocumentState {
    if (!remote) {
      return { state: 'FAILED', tokenCount: 0, chunkCount: 0, error: 'RAGFlow 文档不存在' };
    }
    const tokenRaw = remote.token_count ?? remote.tokenCount ?? 0;
    const chunkRaw = remote.chunk_count ?? remote.chunkCount ?? 0;
    const tokenCount = Number.isFinite(Number(tokenRaw)) ? Number(tokenRaw) : 0;
    const chunkCount = Number.isFinite(Number(chunkRaw)) ? Number(chunkRaw) : 0;
    const run = String(remote.run ?? remote.status ?? '').toUpperCase();
    const progressMsg = String(remote.progress_msg ?? remote.progressMsg ?? '');
    const errorMsg = String(remote.error ?? '');
    const mergedMsg = progressMsg || errorMsg;
    const lowerMsg = mergedMsg.toLowerCase();

    if (run === 'FAIL') {
      return { state: 'FAILED', tokenCount, chunkCount, error: mergedMsg || 'RAGFlow 解析失败' };
    }
    if (run === 'DONE') {
      return { state: 'COMPLETED', tokenCount, chunkCount };
    }
    if (/\[error\]|exception|internal server error/i.test(mergedMsg)) {
      return { state: 'FAILED', tokenCount, chunkCount, error: mergedMsg || 'RAGFlow 解析失败' };
    }
    if (
      lowerMsg.includes('tasks are ahead in the queue') ||
      lowerMsg.includes('task has been received') ||
      lowerMsg.includes('start to parse') ||
      lowerMsg.includes('text extraction') ||
      lowerMsg.includes('image extraction') ||
      lowerMsg.includes('embedding chunks') ||
      lowerMsg.includes('indexing done')
    ) {
      return { state: 'PROCESSING', tokenCount, chunkCount };
    }
    if (run === 'RUNNING' || run === 'PROCESSING' || run === '') {
      return { state: 'PROCESSING', tokenCount, chunkCount };
    }
    if (tokenCount > 0 || chunkCount > 0) {
      return { state: 'COMPLETED', tokenCount, chunkCount };
    }
    return { state: 'PROCESSING', tokenCount, chunkCount };
  }

  private async cleanupStaleMappings(
    datasetId: string,
    documents: SyncedSharePointDocument[],
    articles: KnowledgeArticle[],
    skipSharePoint: boolean,
  ) {
    const spIds = new Set(documents.map((doc) => doc.record.id));
    const articleIds = new Set(articles.map((article) => article.id));
    const staleMappings: string[] = [];

    if (!skipSharePoint && spIds.size === 0) {
      const all = await this.prisma.ragflowDocument.findMany({
        where: { sourceType: RagflowSourceType.SP_DOCUMENT },
        select: { id: true, ragflowDocumentId: true },
      });
      staleMappings.push(...all.map((doc) => doc.id));
      if (all.length > 0) {
        await this.ragflowService.deleteDocuments(
          datasetId,
          all.map((doc) => doc.ragflowDocumentId),
        );
      }
    } else if (!skipSharePoint) {
      const stale = await this.prisma.ragflowDocument.findMany({
        where: {
          sourceType: RagflowSourceType.SP_DOCUMENT,
          sourceId: { notIn: Array.from(spIds) },
        },
        select: { id: true, ragflowDocumentId: true },
      });
      staleMappings.push(...stale.map((doc) => doc.id));
      if (stale.length > 0) {
        await this.ragflowService.deleteDocuments(
          datasetId,
          stale.map((doc) => doc.ragflowDocumentId),
        );
      }
    }

    if (articleIds.size === 0) {
      const all = await this.prisma.ragflowDocument.findMany({
        where: { sourceType: RagflowSourceType.ARTICLE },
        select: { id: true, ragflowDocumentId: true },
      });
      staleMappings.push(...all.map((doc) => doc.id));
      if (all.length > 0) {
        await this.ragflowService.deleteDocuments(
          datasetId,
          all.map((doc) => doc.ragflowDocumentId),
        );
      }
    } else {
      const stale = await this.prisma.ragflowDocument.findMany({
        where: {
          sourceType: RagflowSourceType.ARTICLE,
          sourceId: { notIn: Array.from(articleIds) },
        },
        select: { id: true, ragflowDocumentId: true },
      });
      staleMappings.push(...stale.map((doc) => doc.id));
      if (stale.length > 0) {
        await this.ragflowService.deleteDocuments(
          datasetId,
          stale.map((doc) => doc.ragflowDocumentId),
        );
      }
    }

    if (staleMappings.length > 0) {
      await this.prisma.ragflowDocument.deleteMany({
        where: { id: { in: staleMappings } },
      });
    }
  }

  private async recordSkippedItem(
    taskId: string,
    document: SyncedSharePointDocument,
    reason: string,
  ): Promise<void> {
    try {
      await this.prisma.syncTaskSkippedItem.create({
        data: {
          taskId,
          sourceType: RagflowSourceType.SP_DOCUMENT,
          sourceId: document.record.id,
          filename: document.item.name ?? '',
          fileExtension: document.record.fileExtension ?? null,
          mimeType: document.item.file?.mimeType ?? null,
          reason,
        },
      });
    } catch (error: any) {
      this.logger.warn(
        `Failed to record skipped item ${document.item.id}: ${error?.message || 'unknown error'}`,
      );
    }
  }

  private async recordProcessedItem(input: {
    taskId: string;
    sourceType: RagflowSourceType;
    sourceId: string;
    filename: string;
    fileExtension: string | null;
    mimeType: string | null;
    ragflowDocumentId: string | null;
    tokenCount: number | null;
    chunkCount: number | null;
    sourceSizeBytes: number | null;
    status: SyncItemStatus;
  }): Promise<void> {
    try {
      await this.prisma.syncTaskProcessedItem.create({
        data: {
          taskId: input.taskId,
          sourceType: input.sourceType,
          sourceId: input.sourceId,
          filename: input.filename,
          fileExtension: input.fileExtension,
          mimeType: input.mimeType,
          ragflowDocumentId: input.ragflowDocumentId,
          tokenCount: input.tokenCount,
          chunkCount: input.chunkCount,
          sourceSizeBytes: input.sourceSizeBytes,
          status: input.status,
        },
      });
    } catch (error: any) {
      this.logger.warn(
        `Failed to record processed item ${input.sourceId}: ${error?.message || 'unknown error'}`,
      );
    }
  }

  private async recordFailedItem(input: {
    taskId: string;
    sourceType: RagflowSourceType;
    sourceId: string;
    filename: string;
    fileExtension: string | null;
    mimeType: string | null;
    error: string;
    sourceSizeBytes?: number | null;
    ragflowLimitBytes?: number | null;
  }): Promise<void> {
    try {
      await this.prisma.syncTaskFailedItem.create({
        data: {
          taskId: input.taskId,
          sourceType: input.sourceType,
          sourceId: input.sourceId,
          filename: input.filename,
          fileExtension: input.fileExtension,
          mimeType: input.mimeType,
          error: this.formatSyncError(input.error, {
            sourceSizeBytes: input.sourceSizeBytes ?? null,
            limitBytes: input.ragflowLimitBytes ?? null,
          }),
        },
      });
    } catch (error: any) {
      this.logger.warn(
        `Failed to record failed item ${input.sourceId}: ${error?.message || 'unknown error'}`,
      );
    }
  }

  private normalizeDisplayName(filename: string, contentType: string | null): string {
    const lower = filename.toLowerCase();
    if (this.isSupportedRagflowFile(lower)) {
      return filename;
    }
    const extension = this.mapExtensionByContentType(contentType);
    if (!extension) {
      this.logger.warn(`Unsupported file type for RAGFlow, skipped: ${filename}`);
      return filename;
    }
    return `${filename}.${extension}`;
  }

  private hasSharePointMetadataChanged(
    previous: SyncedSharePointDocument['previousMetadata'],
    current: SyncedSharePointDocument['record'],
  ): boolean {
    if (!previous) {
      return true;
    }
    const prevEtag = previous.spEtag ?? null;
    const nextEtag = current.spEtag ?? null;
    if (prevEtag && nextEtag && prevEtag !== nextEtag) {
      return true;
    }

    const prevModified = previous.spModifiedAt?.getTime() ?? null;
    const nextModified = current.spModifiedAt?.getTime() ?? null;
    if (prevModified !== nextModified) {
      return true;
    }

    const prevSize = previous.size ? Number(previous.size) : null;
    const nextSize = current.size ? Number(current.size) : null;
    if (prevSize !== nextSize) {
      return true;
    }

    return false;
  }

  private isEmbeddingCredentialError(error: any): boolean {
    const message = (error?.message || '').toString().toLowerCase();
    if (!message) {
      return false;
    }
    return (
      message.includes('invalid api-key') ||
      message.includes('api key is invalid') ||
      message.includes('not authorized') ||
      message.includes('allocationquota') ||
      message.includes('free tier') ||
      (message.includes('quota') && message.includes('embedding')) ||
      message.includes('model(@none) not authorized')
    );
  }

  private getUnsupportedSharePointReason(document: SyncedSharePointDocument): string | null {
    const maxSizeBytes = this.getMaxSharePointFileSizeBytes();
    const sizeBytes = document.item.size ?? 0;
    if (maxSizeBytes && sizeBytes > maxSizeBytes) {
      return this.formatFileTooLargeReason(sizeBytes, maxSizeBytes);
    }

    const filename = document.item.name ?? '';
    const lower = filename.toLowerCase();
    const fileExtension = path.extname(lower).replace('.', '');
    if (this.isSupportedRagflowFile(lower)) {
      return null;
    }
    const mimeType = document.item.file?.mimeType ?? null;
    const mappedExtension = this.mapExtensionByContentType(mimeType);
    if (mappedExtension) {
      const normalized = `${filename}.${mappedExtension}`.toLowerCase();
      if (this.isSupportedRagflowFile(normalized)) {
        return null;
      }
    }
    if (fileExtension) {
      return `unsupported file extension .${fileExtension}${mimeType ? ` (mime=${mimeType})` : ''}`;
    }
    if (!mimeType) {
      return 'unsupported file type (missing extension and MIME type)';
    }
    if (!this.mapExtensionByContentType(mimeType)) {
      return `unsupported file type (mime=${mimeType})`;
    }
    return `unsupported file type (mime=${mimeType})`;
  }

  private getMultimodalSkipReason(document: SyncedSharePointDocument): string | null {
    const filename = document.item.name ?? '';
    const extension = path.extname(filename).replace('.', '').toLowerCase();
    const mimeType = document.item.file?.mimeType?.toLowerCase() ?? null;
    if (!this.isMultimodalFile(extension, mimeType)) {
      return null;
    }

    const image2TextModel = this.configService.get<string>('RAGFLOW_IMAGE2TEXT_MODEL') || '';
    const asrModel = this.configService.get<string>('RAGFLOW_ASR_MODEL') || '';
    const localOcrOnly = this.isLocalOcrOnlyEnabled();
    if (this.isAudioFile(extension, mimeType)) {
      return asrModel ? null : '多模态解析未启用（未配置 ASR 模型）';
    }
    if (this.isVideoFile(extension, mimeType)) {
      return image2TextModel ? null : '多模态解析未启用（未配置 IMAGE2TEXT 模型）';
    }
    if (this.isImageFile(extension, mimeType)) {
      if (localOcrOnly) {
        return null;
      }
      return image2TextModel ? null : '多模态解析未启用（未配置 IMAGE2TEXT 模型）';
    }
    return null;
  }

  private isLocalOcrOnlyEnabled(): boolean {
    return (
      (this.configService.get<string>('RAGFLOW_LOCAL_OCR_ONLY') || '').toLowerCase() === 'true'
    );
  }

  private isMultimodalFile(extension: string, mimeType: string | null): boolean {
    return (
      this.isImageOrVideoFile(extension, mimeType) || this.isAudioFile(extension, mimeType)
    );
  }

  private isImageOrVideoFile(extension: string, mimeType: string | null): boolean {
    return this.isImageFile(extension, mimeType) || this.isVideoFile(extension, mimeType);
  }

  private isImageFile(extension: string, mimeType: string | null): boolean {
    if (mimeType?.startsWith('image/')) {
      return true;
    }
    return new Set([
      'jpg',
      'jpeg',
      'png',
      'gif',
      'bmp',
      'tiff',
      'tif',
      'webp',
      'heic',
      'heif',
      'svg',
    ]).has(extension);
  }

  private isVideoFile(extension: string, mimeType: string | null): boolean {
    if (mimeType?.startsWith('video/')) {
      return true;
    }
    return new Set([
      'mp4',
      'mov',
      'avi',
      'flv',
      'mpeg',
      'mpg',
      'webm',
      'wmv',
      '3gp',
      '3gpp',
      'mkv',
    ]).has(extension);
  }

  private isAudioFile(extension: string, mimeType: string | null): boolean {
    if (mimeType?.startsWith('audio/')) {
      return true;
    }
    return new Set(['mp3', 'wav', 'aac', 'flac', 'ogg', 'm4a', 'wma']).has(extension);
  }

  private formatSyncError(
    raw: string,
    context?: { sourceSizeBytes?: number | null; limitBytes?: number | null },
  ): string {
    const lower = raw.toLowerCase();
    if (lower.includes('signature mismatch')) {
      return '文件下载校验失败（签名不一致），已重试仍失败';
    }
    if (lower.includes('requestentitytoolarge') || lower.includes('413')) {
      return this.formatFileSizeError(
        '文件过大，上传被拒绝（413）',
        context?.sourceSizeBytes,
        context?.limitBytes,
      );
    }
    if (lower.includes('file size exceeds')) {
      const parsedLimitBytes = this.parseFileSizeLimitFromMessage(raw);
      return this.formatFileSizeError(
        '文件过大，超过 RAGFlow 限制',
        context?.sourceSizeBytes,
        parsedLimitBytes ?? context?.limitBytes,
      );
    }
    if (lower.includes('model(@none) not authorized')) {
      return '多模态解析未启用（未配置模型）';
    }
    if (lower.includes('download returned non-binary')) {
      return '文件下载失败（返回非二进制内容）';
    }
    if (lower.includes('missing download url')) {
      return '文件下载失败（缺少下载地址）';
    }
    if (lower.includes('download fallback failed')) {
      return `文件下载失败（Graph 回退失败）：${raw}`;
    }
    return raw;
  }

  private formatFileSizeError(
    base: string,
    sourceSizeBytes?: number | null,
    limitBytes?: number | null,
  ): string {
    if (!sourceSizeBytes || !limitBytes) {
      return base;
    }
    const sourceMb = (sourceSizeBytes / 1024 / 1024).toFixed(2);
    const limitMb = (limitBytes / 1024 / 1024).toFixed(2);
    if (sourceSizeBytes > limitBytes) {
      return `${base}（源文件 ${sourceMb}MB > 限制 ${limitMb}MB）`;
    }
    return `${base}（源文件 ${sourceMb}MB，当前配置上限 ${limitMb}MB，实际限制可能更低）`;
  }

  private parseFileSizeLimitFromMessage(raw: string): number | null {
    const match = raw.match(/<=\\s*([\\d.]+)\\s*mb/i);
    if (!match) {
      return null;
    }
    const value = Number(match[1]);
    if (!Number.isFinite(value) || value <= 0) {
      return null;
    }
    return Math.floor(value * 1024 * 1024);
  }

  private formatFileTooLargeReason(sizeBytes: number, limitBytes: number): string {
    const sizeMb = (sizeBytes / 1024 / 1024).toFixed(2);
    const limitMb = (limitBytes / 1024 / 1024).toFixed(2);
    return `文件过大（${sizeMb}MB > ${limitMb}MB）`;
  }

  private getMaxSharePointFileSizeBytes(): number | null {
    const raw = this.configService.get<string>('KB_SP_MAX_FILE_SIZE_MB');
    const value = raw ? Number(raw) : NaN;
    if (!Number.isFinite(value) || value <= 0) {
      return null;
    }
    return Math.floor(value * 1024 * 1024);
  }

  private getRagflowMaxContentLengthBytes(): number | null {
    const raw = this.configService.get<string>('RAGFLOW_MAX_CONTENT_LENGTH');
    const value = raw ? Number(raw) : NaN;
    if (!Number.isFinite(value) || value <= 0) {
      return null;
    }
    return Math.floor(value);
  }

  private getSyncTimeoutMs(): number {
    const raw = this.configService.get<string>('KB_SYNC_TIMEOUT_MINUTES');
    const minutes = raw ? Number(raw) : NaN;
    const fallbackMinutes = 120;
    const finalMinutes = Number.isFinite(minutes) && minutes > 0 ? minutes : fallbackMinutes;
    return finalMinutes * 60 * 1000;
  }

  private getSyncProgressTimeoutMs(): number {
    const raw = this.configService.get<string>('KB_SYNC_PROGRESS_TIMEOUT_MINUTES');
    const minutes = raw ? Number(raw) : NaN;
    const fallbackMinutes = 30;
    const finalMinutes = Number.isFinite(minutes) && minutes > 0 ? minutes : fallbackMinutes;
    return finalMinutes * 60 * 1000;
  }

  private getRemoteHealthCheckIntervalMs(): number {
    const raw = this.configService.get<string>('RAGFLOW_REMOTE_HEALTH_CHECK_INTERVAL_MINUTES');
    const minutes = raw ? Number(raw) : NaN;
    const fallbackMinutes = 24 * 60;
    const finalMinutes = Number.isFinite(minutes) && minutes > 0 ? minutes : fallbackMinutes;
    return finalMinutes * 60 * 1000;
  }

  private shouldCheckRemoteHealth(lastSyncedAt?: Date | null): boolean {
    if (!lastSyncedAt) {
      return true;
    }
    const intervalMs = this.getRemoteHealthCheckIntervalMs();
    return Date.now() - lastSyncedAt.getTime() >= intervalMs;
  }

  private async getActiveSyncTaskId(): Promise<string | null> {
    const activeTasks = await this.prisma.syncTask.findMany({
      where: { status: SyncStatus.SYNCING },
      orderBy: { startedAt: 'desc' },
    });
    if (activeTasks.length === 0) {
      return null;
    }

    const timeoutMs = this.getSyncTimeoutMs();
    const now = Date.now();
    const validTasks: typeof activeTasks = [];

    for (const task of activeTasks) {
      const normalized = await this.normalizeSyncTaskStatus(task);
      if (normalized.status !== SyncStatus.SYNCING) {
        continue;
      }
      const baseTime = task.lastProgressAt ?? task.startedAt;
      if (baseTime && now - baseTime.getTime() > timeoutMs) {
        await this.prisma.syncTask.update({
          where: { id: task.id },
          data: {
            status: SyncStatus.FAILED,
            error: `同步超时（超过 ${Math.round(timeoutMs / 60000)} 分钟）`,
            completedAt: new Date(),
            lastProgressAt: new Date(),
          },
        });
        continue;
      }
      validTasks.push(task);
    }

    if (validTasks.length === 0) {
      return null;
    }

    const [latest, ...others] = validTasks;
    if (others.length > 0) {
      await this.prisma.syncTask.updateMany({
        where: { id: { in: others.map((task) => task.id) } },
        data: {
          status: SyncStatus.FAILED,
          error: '已被新的同步任务终止',
          completedAt: new Date(),
          lastProgressAt: new Date(),
        },
      });
    }

    return latest.id;
  }

  private async normalizeSyncTaskStatus(task: {
    id: string;
    taskType: string;
    status: SyncStatus;
    totalItems: number | null;
    processedItems: number;
    processingItems?: number;
    failedItems: number;
    skippedItems: number;
    processedTokens: number;
    processedChunks: number;
    error: string | null;
    startedAt: Date | null;
    createdAt: Date;
    completedAt: Date | null;
    triggerSource: string | null;
  }) {
    if (task.status !== SyncStatus.SYNCING) {
      return task;
    }
    if (task.totalItems === null) {
      return task;
    }
    const processingItems = task.processingItems ?? 0;
    const totalProcessed =
      task.processedItems + task.failedItems + task.skippedItems + processingItems;
    if (totalProcessed < task.totalItems) {
      return task;
    }
    if (processingItems > 0) {
      return task;
    }
    const finalStatus = task.failedItems > 0 ? SyncStatus.FAILED : SyncStatus.COMPLETED;
    const finalError =
      task.failedItems > 0 ? task.error ?? '同步完成但存在失败项' : null;
    const updated = await this.prisma.syncTask.update({
      where: { id: task.id },
      data: {
        status: finalStatus,
        error: finalError,
        completedAt: task.completedAt ?? new Date(),
        lastProgressAt: new Date(),
      },
    });
    return {
      ...task,
      status: updated.status,
      error: updated.error ?? null,
      completedAt: updated.completedAt ?? task.completedAt,
    };
  }

  private async assertNotTimedOut(
    taskId: string,
    startTime: number,
    timeoutMs: number,
  ): Promise<void> {
    if (Date.now() - startTime <= timeoutMs) {
      return;
    }
    await this.prisma.syncTask.update({
      where: { id: taskId },
      data: {
        status: SyncStatus.FAILED,
        error: `同步超时（超过 ${Math.round(timeoutMs / 60000)} 分钟）`,
        completedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });
    throw new Error('Sync timed out');
  }

  private async assertTaskActive(taskId: string): Promise<void> {
    const task = await this.prisma.syncTask.findUnique({
      where: { id: taskId },
      select: { status: true },
    });
    if (!task) {
      throw new NotFoundException(`Sync task ${taskId} not found`);
    }
    if (task.status !== SyncStatus.SYNCING) {
      throw new Error('Sync task terminated');
    }
  }

  private async assertNotStalled(
    taskId: string,
    lastProgressAtMs: number,
    timeoutMs: number,
  ): Promise<void> {
    if (timeoutMs <= 0) {
      return;
    }
    if (Date.now() - lastProgressAtMs <= timeoutMs) {
      return;
    }
    await this.prisma.syncTask.update({
      where: { id: taskId },
      data: {
        status: SyncStatus.FAILED,
        error: `同步进度停滞（超过 ${Math.round(timeoutMs / 60000)} 分钟无进展）`,
        completedAt: new Date(),
        lastProgressAt: new Date(),
      },
    });
    throw new Error('Sync stalled');
  }

  private async updateProgressHeartbeat(
    taskId: string,
    input: {
      processedItems: number;
      processingItems: number;
      failedItems: number;
      skippedItems: number;
      processedTokens: number;
      processedChunks: number;
      forceCounts: boolean;
    },
  ): Promise<number> {
    const now = new Date();
    const data: {
      lastProgressAt: Date;
      processedItems?: number;
      processingItems?: number;
      failedItems?: number;
      skippedItems?: number;
      processedTokens?: number;
      processedChunks?: number;
    } = {
      lastProgressAt: now,
    };

    if (input.forceCounts) {
      data.processedItems = input.processedItems;
      data.processingItems = input.processingItems;
      data.failedItems = input.failedItems;
      data.skippedItems = input.skippedItems;
      data.processedTokens = input.processedTokens;
      data.processedChunks = input.processedChunks;
    }

    await this.prisma.syncTask.update({
      where: { id: taskId },
      data,
    });

    return now.getTime();
  }

  private mapExtensionByContentType(contentType: string | null): string | null {
    if (!contentType) {
      return null;
    }
    const normalized = contentType.split(';')[0].trim().toLowerCase();
    switch (normalized) {
      case 'application/pdf':
        return 'pdf';
      case 'application/json':
      case 'application/ld+json':
        return 'json';
      case 'text/markdown':
        return 'md';
      case 'text/csv':
        return 'csv';
      case 'text/plain':
        return 'txt';
      case 'application/xml':
      case 'text/xml':
        return 'xml';
      case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
        return 'docx';
      case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
        return 'xlsx';
      case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
        return 'pptx';
      case 'application/msword':
        return 'doc';
      case 'application/vnd.ms-excel':
        return 'xls';
      case 'application/vnd.ms-powerpoint':
        return 'ppt';
      default:
        return null;
    }
  }

  private isSupportedRagflowFile(filename: string): boolean {
    if (/\.(pdf)$/.test(filename)) {
      return true;
    }
    if (/\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|mdx|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$/.test(filename)) {
      return true;
    }
    if (/\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$/.test(filename)) {
      return true;
    }
    if (/\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|wmf|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4|mkv)$/.test(filename)) {
      return true;
    }
    return false;
  }

  private async writeTempFile(filename: string, content: string): Promise<string> {
    const safeName = filename.replace(/[\\/:*?"<>|]+/g, '_');
    const tempDir = path.join(process.cwd(), 'tmp', 'knowledge-base');
    await fs.promises.mkdir(tempDir, { recursive: true });
    const filePath = path.join(tempDir, `${Date.now()}-${safeName}`);
    await fs.promises.writeFile(filePath, content, 'utf-8');
    return filePath;
  }

  private hashContent(content: string): string {
    return crypto.createHash('sha256').update(content).digest('hex');
  }

  private async safeRemove(filePath: string) {
    try {
      await fs.promises.unlink(filePath);
    } catch {
      // ignore temp cleanup errors
    }
  }
}
