import { Injectable, Logger, ServiceUnavailableException } from '@nestjs/common';
import { PrismaService } from '@core/database/prisma/prisma.service';
import { MetricsService } from './metrics.service';
import { SearchType } from '@prisma/client';
import { RagflowService } from './ragflow.service';

export interface SearchResultItem {
  id: string;
  type: 'document' | 'article' | 'folder';
  title: string;
  webUrl?: string;
  snippet: string;
  previewContent?: string;
  score: number;
  docType?: string;
  docAuthorityLevel?: string;
  lastModifiedAt?: Date;
  createdBy?: string;
  folderPath?: string;
}

export interface SearchResponse {
  items: SearchResultItem[];
  total: number;
  searchType: string;
  responseTimeMs: number;
}

@Injectable()
export class RagflowSearchService {
  private readonly logger = new Logger(RagflowSearchService.name);

  constructor(
    private readonly prisma: PrismaService,
    private readonly ragflowService: RagflowService,
    private readonly metricsService: MetricsService,
  ) {}

  async search(
    userId: string,
    query: string,
    limit: number = 20,
  ): Promise<SearchResponse> {
    const start = Date.now();
    const datasetId = await this.ragflowService.resolveDatasetId();
    const pageSize = Math.max(limit * 3, 30);
    let data: any = null;
    try {
      data = await this.ragflowService.retrieve(datasetId, query, pageSize);
    } catch (error: any) {
      const responseTimeMs = Date.now() - start;
      const errorMessage = error?.message || 'unknown error';
      this.logger.error(`RAGFlow retrieval failed: ${errorMessage}`);
      await this.metricsService.logSearch(userId, {
        query,
        resultCount: 0,
        responseTimeMs,
        searchType: SearchType.SEMANTIC,
      });
      throw new ServiceUnavailableException({
        code: 'KNOWLEDGE_BASE_SEARCH_PROVIDER_ERROR',
        message: '检索服务不可用，请检查向量模型授权或余额',
        details: {
          provider: 'ragflow',
          error: errorMessage,
        },
      });
    }
    const chunks: Array<any> = data?.chunks ?? [];

    const bestChunks = this.pickTopChunksByDocument(chunks, limit);
    const chunkGroups = this.groupChunksByDocument(chunks, new Set(bestChunks.map((chunk) => chunk.document_id)));
    const documentIds = bestChunks.map((chunk) => chunk.document_id);
    const mappings = await this.prisma.ragflowDocument.findMany({
      where: { ragflowDocumentId: { in: documentIds } },
    });

    const mappingByDoc = new Map(mappings.map((m) => [m.ragflowDocumentId, m]));
    const spIds = mappings.filter((m) => m.sourceType === 'SP_DOCUMENT').map((m) => m.sourceId);
    const articleIds = mappings.filter((m) => m.sourceType === 'ARTICLE').map((m) => m.sourceId);

    const [spDocs, articles] = await Promise.all([
      spIds.length > 0
        ? this.prisma.sPDocumentIndex.findMany({ where: { id: { in: spIds } } })
        : [],
      articleIds.length > 0
        ? this.prisma.knowledgeArticle.findMany({ where: { id: { in: articleIds } } })
        : [],
    ]);

    const spMap = new Map(spDocs.map((doc) => [doc.id, doc]));
    const articleMap = new Map(articles.map((article) => [article.id, article]));

    const items: SearchResultItem[] = bestChunks.map((chunk) => {
      const mapping = mappingByDoc.get(chunk.document_id);
      if (mapping?.sourceType === 'SP_DOCUMENT') {
        const doc = spMap.get(mapping.sourceId);
        return {
          id: doc?.id ?? chunk.document_id,
          type: 'document',
          title: doc?.title ?? chunk.document_keyword ?? 'Unknown',
          webUrl: doc?.webUrl ?? undefined,
          snippet: this.clipSnippet(chunk.content),
          previewContent: this.buildPreviewContent(chunkGroups.get(chunk.document_id) ?? []),
          score: chunk.similarity ?? 0,
          docType: doc?.docType ?? undefined,
          docAuthorityLevel: doc?.docAuthorityLevel ?? undefined,
          lastModifiedAt: doc?.spModifiedAt ?? undefined,
          createdBy: doc?.createdBy ?? undefined,
        };
      }

      if (mapping?.sourceType === 'ARTICLE') {
        const article = articleMap.get(mapping.sourceId);
        return {
          id: article?.id ?? chunk.document_id,
          type: 'article',
          title: article?.title ?? chunk.document_keyword ?? 'Unknown',
          snippet: this.clipSnippet(chunk.content),
          previewContent: this.buildPreviewContent(chunkGroups.get(chunk.document_id) ?? []),
          score: chunk.similarity ?? 0,
          docType: 'ARTICLE',
          docAuthorityLevel: article?.status === 'PUBLISHED' ? 'PUBLISHED' : 'DRAFT',
          lastModifiedAt: article?.updatedAt ?? undefined,
        };
      }

      return {
        id: chunk.document_id,
        type: 'document',
        title: chunk.document_keyword ?? 'Unknown',
        snippet: this.clipSnippet(chunk.content),
        previewContent: this.buildPreviewContent(chunkGroups.get(chunk.document_id) ?? []),
        score: chunk.similarity ?? 0,
      };
    });

    const folderItems = await this.searchFolders(query, limit);
    const mergedItems = [...items, ...folderItems];

    const responseTimeMs = Date.now() - start;

    await this.metricsService.logSearch(userId, {
      query,
      resultCount: mergedItems.length,
      responseTimeMs,
      searchType: SearchType.SEMANTIC,
    });

    return {
      items: mergedItems,
      total: mergedItems.length,
      searchType: 'semantic',
      responseTimeMs,
    };
  }

  private async searchFolders(query: string, limit: number): Promise<SearchResultItem[]> {
    const keyword = query.trim();
    if (!keyword) {
      return [];
    }
    const tokens = this.extractSearchTokens(keyword);
    const normalizedKeyword = this.normalizeFolderSearchText(keyword);
    const normalizedTokens = tokens.map((token) => this.normalizeFolderSearchText(token));
    const take = Math.max(5, Math.min(limit, 20));

    const folders = await this.prisma.$queryRaw<
      Array<{
        id: string;
        title: string;
        folder_path: string;
        web_url: string;
        created_by: string | null;
        sp_modified_at: Date | null;
      }>
    >`
      SELECT id, title, folder_path, web_url, created_by, sp_modified_at
      FROM platform_knowledge.sp_folder_index
      WHERE
        title ILIKE ${`%${keyword}%`}
        OR folder_path ILIKE ${`%${keyword}%`}
        OR regexp_replace(lower(title), '[\\s\\-_]+', '', 'g') LIKE ${`%${normalizedKeyword}%`}
        OR regexp_replace(lower(folder_path), '[\\s\\-_]+', '', 'g') LIKE ${`%${normalizedKeyword}%`}
        OR EXISTS (
          SELECT 1
          FROM unnest(${tokens}::text[]) AS t(token)
          WHERE title ILIKE ('%' || token || '%')
             OR folder_path ILIKE ('%' || token || '%')
        )
        OR EXISTS (
          SELECT 1
          FROM unnest(${normalizedTokens}::text[]) AS nt(token)
          WHERE regexp_replace(lower(title), '[\\s\\-_]+', '', 'g') LIKE ('%' || token || '%')
             OR regexp_replace(lower(folder_path), '[\\s\\-_]+', '', 'g') LIKE ('%' || token || '%')
        )
      ORDER BY sp_modified_at DESC NULLS LAST
      LIMIT ${take}
    `;

    return folders.map((folder) => ({
      id: folder.id,
      type: 'folder',
      title: folder.title,
      webUrl: folder.web_url,
      snippet: folder.folder_path,
      score: this.calculateFolderScore(folder.title, folder.folder_path, keyword, tokens),
      lastModifiedAt: folder.sp_modified_at ?? undefined,
      createdBy: folder.created_by ?? undefined,
      folderPath: folder.folder_path,
    }));
  }

  private calculateFolderScore(
    title: string,
    folderPath: string,
    query: string,
    tokens?: string[],
  ): number {
    const normalizedQuery = this.normalizeFolderSearchText(query);
    if (!normalizedQuery) {
      return 0;
    }

    const normalizedTitle = this.normalizeFolderSearchText(title);
    const normalizedPath = this.normalizeFolderSearchText(folderPath);
    if (normalizedTitle === normalizedQuery) {
      return 1;
    }
    if (normalizedTitle.startsWith(normalizedQuery)) {
      return 0.95;
    }
    if (normalizedTitle.includes(normalizedQuery)) {
      return 0.88;
    }
    if (normalizedPath.includes(normalizedQuery)) {
      return 0.75;
    }
    if (!tokens || tokens.length === 0) {
      return 0.6;
    }
    const tokenMatches = this.countTokenMatches(`${title} ${folderPath}`, tokens);
    return Math.min(0.74, 0.6 + tokenMatches * 0.04);
  }

  private normalizeFolderSearchText(value: string): string {
    return value
      .trim()
      .toLowerCase()
      .replace(/[\s\-_]+/g, '');
  }

  private extractSearchTokens(query: string): string[] {
    const tokenSet = new Set<string>();
    const rawTokens = query
      .split(/[\s,，。！？!?；;、|/\\()[\]{}"'`<>:：\-_.]+/g)
      .map((token) => token.trim())
      .filter(Boolean);

    rawTokens.forEach((rawToken) => {
      if (rawToken.length >= 2) {
        tokenSet.add(rawToken);
      }

      const compactToken = this.normalizeFolderSearchText(rawToken);
      if (compactToken.length >= 2) {
        tokenSet.add(compactToken);
      }

      const parts = rawToken.match(/[a-z0-9]+|[\u3400-\u9FFF]+/gi) ?? [];
      parts.forEach((part) => {
        const normalizedPart = part.trim();
        if (!normalizedPart) {
          return;
        }

        if (/^[a-z0-9]+$/i.test(normalizedPart)) {
          if (normalizedPart.length >= 2) {
            tokenSet.add(normalizedPart);
          }
          return;
        }

        if (/^[\u3400-\u9FFF]+$/.test(normalizedPart)) {
          if (normalizedPart.length >= 2) {
            tokenSet.add(normalizedPart);
          }
          // 中文短词回退：支持“测试”召回“预测”等单字命中历史行为。
          if (normalizedPart.length <= 6) {
            Array.from(normalizedPart).forEach((char) => {
              tokenSet.add(char);
            });
          }
        }
      });
    });

    return Array.from(tokenSet).slice(0, 20);
  }

  private countTokenMatches(text: string, tokens: string[]): number {
    const normalizedText = this.normalizeFolderSearchText(text);
    if (!normalizedText) {
      return 0;
    }
    return tokens.reduce((count, token) => {
      const normalizedToken = this.normalizeFolderSearchText(token);
      if (!normalizedToken) {
        return count;
      }
      return normalizedText.includes(normalizedToken) ? count + 1 : count;
    }, 0);
  }

  private pickTopChunksByDocument(chunks: Array<any>, limit: number): Array<any> {
    const map = new Map<string, any>();

    for (const chunk of chunks) {
      const docId = chunk.document_id;
      if (!docId) continue;
      const existing = map.get(docId);
      if (!existing || (chunk.similarity ?? 0) > (existing.similarity ?? 0)) {
        map.set(docId, chunk);
      }
    }

    return Array.from(map.values())
      .sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0))
      .slice(0, limit);
  }

  private clipSnippet(content?: string, maxLength: number = 200): string {
    if (!content) return '';
    const cleaned = content.replace(/\s+/g, ' ').trim();
    if (cleaned.length <= maxLength) {
      return cleaned;
    }
    return `${cleaned.substring(0, maxLength)}...`;
  }

  private groupChunksByDocument(
    chunks: Array<any>,
    targetDocumentIds: Set<string>,
  ): Map<string, Array<any>> {
    const grouped = new Map<string, Array<any>>();

    chunks.forEach((chunk) => {
      const docId = chunk?.document_id;
      if (!docId || !targetDocumentIds.has(docId)) {
        return;
      }
      const existing = grouped.get(docId);
      if (existing) {
        existing.push(chunk);
        return;
      }
      grouped.set(docId, [chunk]);
    });

    return grouped;
  }

  private buildPreviewContent(chunks: Array<any>, maxLength: number = 200000): string {
    if (!Array.isArray(chunks) || chunks.length === 0) {
      return '';
    }

    const uniqueParts: string[] = [];
    const seen = new Set<string>();
    const sorted = [...chunks].sort((a, b) => (b?.similarity ?? 0) - (a?.similarity ?? 0));

    for (const chunk of sorted) {
      const normalized = String(chunk?.content ?? '')
        .replace(/\s+/g, ' ')
        .trim();
      if (!normalized || seen.has(normalized)) {
        continue;
      }
      seen.add(normalized);
      uniqueParts.push(normalized);
    }

    if (uniqueParts.length === 0) {
      return '';
    }

    const merged = uniqueParts.join('\n\n');
    if (merged.length <= maxLength) {
      return merged;
    }
    return merged.slice(0, maxLength);
  }
}
