import { ProcessLogger } from '../process-logger';
import { fineTuningService } from './fine-tuning-service';
import { feedbackService } from './feedback-service';
import { DocumentType } from '../../types/document';
import { TextChunker } from '../utils/text-chunker';
import { OpenAIRetry } from '../utils/openai-retry';
import { createWorker } from 'tesseract.js';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { fileToBase64, DEFAULT_VISION_MODEL, VisionMessage } from '../llm/openrouter';

// Use the public path for the worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.min.js';

interface ClassificationMetadata {
  filename: string;
  textLength: number;
}

export class DocumentClassificationService {
  private processLogger: ProcessLogger;
  private textChunker: TextChunker;
  private userId: string;
  private openaiRetry: OpenAIRetry;

  constructor(_apiKey: string, userId: string) {
    this.userId = userId;
    this.processLogger = new ProcessLogger(userId);
    this.textChunker = new TextChunker(userId);
    this.openaiRetry = new OpenAIRetry(userId, this.processLogger);
  }

  /**
   * Analyze filename for document type hints
   */
  private analyzeFilename(filename: string): { suggestedType: DocumentType; confidence: number } {
    const lowerFilename = filename.toLowerCase();
    
    // Check for clear original lease indicators first (high confidence)
    if (lowerFilename.includes('lease agreement') || 
        lowerFilename.includes('original lease') ||
        lowerFilename.includes('master lease') ||
        /^lease.*agreement/i.test(lowerFilename) ||
        /^standard.*lease/i.test(lowerFilename)) {
      return { suggestedType: 'original_lease', confidence: 0.9 };
    }
    
    // Check for numbered amendment indicators
    if (/\d+(?:st|nd|rd|th)\s*(?:amendment|amend)/i.test(filename)) {
      return { suggestedType: 'amendment', confidence: 0.7 };
    }

    // Check for amendment with date pattern
    if (/amendment.*\d{1,4}[-/]\d{1,2}[-/]\d{2,4}/i.test(filename) ||
        /\d{1,4}[-/]\d{1,2}[-/]\d{2,4}.*amendment/i.test(filename)) {
      return { suggestedType: 'amendment', confidence: 0.6 };
    }
    
    // Check for clear amendment indicators
    if (lowerFilename.includes('amendment to lease') || 
        lowerFilename.includes('lease amendment')) {
      return { suggestedType: 'amendment', confidence: 0.6 };
    }

    // Check for amendment word
    if (lowerFilename.includes('amendment') || 
        lowerFilename.includes('amend')) {
      return { suggestedType: 'amendment', confidence: 0.5 };
    }

    // If no clear indicators, default to original lease with low confidence
    return { suggestedType: 'original_lease', confidence: 0.3 };
  }

  /**
   * Analyze document size for type hints
   */
  private analyzeDocumentSize(textLength: number): { suggestedType: DocumentType; confidence: number } {
    // Make size analysis more balanced
    if (textLength < 2000) {
      return { suggestedType: 'amendment', confidence: 0.8 };
    }
    
    // Documents between 2000-8000 characters could be either
    if (textLength < 8000) {
      return { suggestedType: 'original_lease', confidence: 0.5 };
    }

    // Original leases are typically longer
    if (textLength > 15000) {
      return { suggestedType: 'original_lease', confidence: 0.9 };
    }
    
    return { suggestedType: 'original_lease', confidence: 0.7 };
  }

  /**
   * Analyze document title and first page content
   */
  private analyzeDocumentTitle(text: string): { suggestedType: DocumentType; confidence: number } {
    // Get first 2000 characters to analyze the title and first paragraph area
    const titleArea = text.slice(0, 2000).toLowerCase();
    
    // Look for strong title indicators at the start of the document
    const titleMatch = titleArea.match(/^[\s\n]*([^.!?\n]{0,200})/);
    const firstLine = titleMatch ? titleMatch[1].toLowerCase() : '';
    
    // Definitive amendment indicators (highest confidence)
    const definitiveAmendmentPatterns = [
      /^\s*(?:first|second|third|fourth|fifth|\d+(?:st|nd|rd|th))?\s*amendment\s+to\s+lease/i,
      /^amendment\s*(no\.)?\s*\d+/i,
      /^amendment\s+to\s+lease/i,
      /this\s+(?:first|second|third|fourth|fifth|\d+(?:st|nd|rd|th))?\s*amendment/i,
      /amendment\s+dated.*\s+to.*\s+lease.*dated/i
    ];

    for (const pattern of definitiveAmendmentPatterns) {
      if (pattern.test(firstLine)) {
        return { suggestedType: 'amendment', confidence: 0.98 };
      }
    }

    // Secondary amendment indicators in first paragraph
    const secondaryAmendmentPatterns = [
      /whereas.*\s+lease.*\s+dated.*\s+amendment/i,
      /reference\s+is\s+made\s+to.*\s+lease.*\s+dated/i,
      /that\s+certain\s+lease.*\s+dated.*\s+is\s+hereby\s+amended/i,
      /parties.*\s+hereby\s+amend.*\s+lease/i
    ];

    for (const pattern of secondaryAmendmentPatterns) {
      if (pattern.test(titleArea)) {
        return { suggestedType: 'amendment', confidence: 0.95 };
      }
    }

    // Definitive original lease indicators (highest confidence)
    const definitiveLeasePatterns = [
      /^\s*(?:standard\s+)?(?:office|industrial|commercial|retail)?\s*lease\s+agreement/i,
      /^\s*lease\s+agreement/i,
      /this\s+lease\s+agreement.*made.*between/i,
      /^\s*standard\s+(industrial|office|commercial|retail)\s+lease/i,
      /^lease\s+dated.*\s+between/i,
      /^net\s+lease.*\s+agreement/i
    ];

    for (const pattern of definitiveLeasePatterns) {
      if (pattern.test(firstLine)) {
        return { suggestedType: 'original_lease', confidence: 0.98 };
      }
    }

    // Secondary lease indicators in first paragraph
    const secondaryLeasePatterns = [
      /witnesseth.*\s+landlord.*\s+hereby\s+leases.*\s+premises/i,
      /now,\s*therefore.*\s+landlord.*\s+hereby\s+leases.*\s+premises/i,
      /in\s+consideration\s+of.*\s+landlord.*\s+hereby\s+leases/i,
      /this\s+indenture\s+of\s+lease.*\s+between/i,
      /basic\s+lease\s+provisions/i,
      /article\s+1.*\s+premises.*\s+article\s+2/i
    ];

    for (const pattern of secondaryLeasePatterns) {
      if (pattern.test(titleArea)) {
        return { suggestedType: 'original_lease', confidence: 0.95 };
      }
    }

    // Look for amendment indicators in the content
    const contentAmendmentIndicators = [
      /hereby\s+amends?\s+that\s+certain\s+lease/i,
      /the\s+lease\s+is\s+hereby\s+amended/i,
      /amend.*\s+following\s+terms\s+and\s+conditions/i,
      /modifications?\s+to\s+lease/i
    ];

    for (const pattern of contentAmendmentIndicators) {
      if (pattern.test(titleArea)) {
        return { suggestedType: 'amendment', confidence: 0.9 };
      }
    }

    // Look for lease structure indicators
    const leaseStructureIndicators = [
      /table\s+of\s+contents.*\s+article/i,
      /article\s+1.*\s+definitions/i,
      /section\s+1.*\s+premises.*\s+section\s+2/i,
      /basic\s+lease\s+information/i
    ];

    for (const pattern of leaseStructureIndicators) {
      if (pattern.test(titleArea)) {
        return { suggestedType: 'original_lease', confidence: 0.9 };
      }
    }

    // If no clear indicators found, return low confidence
    return { suggestedType: 'original_lease', confidence: 0.3 };
  }

  /**
   * Get enhanced classification prompt with examples
   */
  private async getEnhancedPrompt(): Promise<string> {
    try {
      const enhancement = await feedbackService.getEnhancedPrompt('document_type');
      
      const prompt = `
        ${enhancement.basePrompt}
        
        LSC-o1 Classification Guidelines:
        ${enhancement.warnings.join('\n')}
        
        Training Examples:
        ${enhancement.examples.map(e => 
          `Document Text: ${e.original}\nClassification: ${e.corrected}`
        ).join('\n\n')}
        
        Analyze the following document and determine its type.
        Return a JSON object with a "type" field containing one of: original_lease, amendment, option, extension
        For example: {"type": "original_lease"}
        
        Document Types:
        - original_lease: The primary lease agreement
        - amendment: A document that modifies the original lease
        - option: A document granting rights to extend or modify the lease
        - extension: A document extending the lease term

        IMPORTANT: Your response must be a valid JSON object with a "type" field.
      `;

      await this.processLogger.log(
        `LSC-o1 initialized with ${enhancement.examples.length} training examples`,
        'info'
      );

      return prompt.trim();
    } catch (error) {
      await this.processLogger.log(
        `Error initializing LSC-o1, using base configuration: ${error}`,
        'error'
      );
      return `Classify the document type as one of: original_lease, amendment, option, extension`;
    }
  }

  /**
   * Map classification result to valid DocumentType
   */
  private mapToDocumentType(result: string): DocumentType {
    const type = result.toLowerCase();
    switch (type) {
      case 'original_lease':
      case 'amendment':
      case 'option':
      case 'extension':
        return type as DocumentType;
      case 'supplemental':
      case 'related':
        return 'amendment';
      default:
        // Changed default to use original_lease if we can't determine
        return 'original_lease';
    }
  }

  /**
   * Extract first page as image for vision analysis
   */
  private async extractFirstPageImage(file: File): Promise<string | null> {
    try {
      if (file.type === 'application/pdf') {
        const arrayBuffer = await file.arrayBuffer();
        const pdf = await getDocument({
          data: arrayBuffer,
          disableFontFace: true,
          isEvalSupported: false
        }).promise;

        // Get first page
        const page = await pdf.getPage(1);
        const scale = 2.0; // Higher scale for better quality
        const viewport = page.getViewport({ scale });
        
        // Create canvas and render page
        const canvas = document.createElement('canvas');
        const context = canvas.getContext('2d');
        if (!context) {
          throw new Error('Failed to create canvas context');
        }

        canvas.height = viewport.height;
        canvas.width = viewport.width;

        await page.render({
          canvasContext: context,
          viewport: viewport
        }).promise;

        // Convert to base64
        const blob = await new Promise<Blob>((resolve) => {
          canvas.toBlob((blob) => resolve(blob!), 'image/png', 1.0);
        });
        return await fileToBase64(blob);
      } else if (file.type.startsWith('image/')) {
        return await fileToBase64(file);
      }
      return null;
    } catch (error) {
      console.error('Error extracting first page image:', error);
      return null;
    }
  }

  /**
   * Use vision model to analyze document type
   */
  private async analyzeDocumentWithVision(
    file: File,
    text: string
  ): Promise<{ type: DocumentType; confidence: number } | null> {
    try {
      const imageData = await this.extractFirstPageImage(file);
      if (!imageData) {
        return null;
      }

      const systemPrompt = `You are a document classifier for lease documents.
Determine if this is an original lease agreement or an amendment.

Return ONLY a JSON object:
{
  "type": "original_lease" | "amendment",
  "confidence": number between 0-1
}`;

      const userPrompt = `Is this document an original lease agreement or an amendment? Look at the title and first paragraph.`;

      const messages: VisionMessage[] = [
        {
          role: 'system',
          content: systemPrompt
        },
        {
          role: 'user',
          content: [
            {
              type: 'text',
              text: userPrompt
            },
            {
              type: 'image_url',
              image_url: {
                url: imageData
              }
            }
          ]
        }
      ];

      const response = await this.openaiRetry.createChatCompletion({
        messages,
        model: DEFAULT_VISION_MODEL,
        maxTokens: 500,
        response_format: { type: "json_object" },
        isVision: true
      }, 'document-classification');

      if (!response?.choices?.[0]?.message?.content) {
        throw new Error('Invalid vision model response');
      }

      const result = JSON.parse(response.choices[0].message.content);
      await this.processLogger.log(
        `Vision analysis result: ${JSON.stringify(result)}`,
        'info'
      );

      if (result.reasoning) {
        await this.processLogger.log(
          `Classification reasoning: ${result.reasoning}`,
          'info'
        );
      }

      return {
        type: result.type as DocumentType,
        confidence: result.confidence
      };
    } catch (error) {
      console.error('Vision analysis error:', error);
      await this.processLogger.log(
        `Vision analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
        'error'
      );
      return null;
    }
  }

  /**
   * Classify a document using LSC-o1
   */
  async classifyDocument(
    documentId: string,
    text: string,
    metadata: ClassificationMetadata,
    file?: File,
    skipFloorPlanCheck: boolean = false
  ): Promise<{
    type: DocumentType;
    confidence: number;
    processingTime: number;
  }> {
    try {
      const startTime = Date.now();

      // Check if file is a floor plan
      const lowerFileName = metadata.filename.toLowerCase();
      const isFloorPlan = lowerFileName.includes('floor plan') || 
                         lowerFileName.includes('floorplan') || 
                         lowerFileName.includes('floor-plan');

      // Skip floor plan processing if skipFloorPlanCheck is true
      if (isFloorPlan && skipFloorPlanCheck) {
        await this.processLogger.log('Skipping floor plan check since vision checking for floor plans is disabled', 'info');
        return {
          type: 'original_lease',
          confidence: 0.5,
          processingTime: 0
        };
      }

      // Check if PDF has more than 5 pages - do this first before any other processing
      if (file && file.type === 'application/pdf') {
        const arrayBuffer = await file.arrayBuffer();
        const pdf = await getDocument({
          data: arrayBuffer,
          disableFontFace: true,
          isEvalSupported: false
        }).promise;

        const pageCount = pdf.numPages;
        
        // Log the page count for debugging
        await this.processLogger.log(
          `Document has ${pageCount} pages`,
          'info'
        );

        if (pageCount > 5) {
          await this.processLogger.log(
            `Document has ${pageCount} pages - automatically classifying as original lease`,
            'info'
          );
          return {
            type: 'original_lease',
            confidence: 0.98,
            processingTime: Date.now() - startTime
          };
        } else if (pageCount <= 2) {
          // If document is 1-2 pages, it's likely an amendment
          await this.processLogger.log(
            `Document has ${pageCount} pages - likely an amendment`,
            'info'
          );
          // Don't return immediately, but use this as a strong signal in classification
          const titleAnalysis = this.analyzeDocumentTitle(text);
          if (titleAnalysis.confidence < 0.9) { // If we don't have a very confident title match
            return {
              type: 'amendment',
              confidence: 0.85,
              processingTime: Date.now() - startTime
            };
          }
        }
      }

      // First try vision analysis if file is provided and floor plan check is not skipped
      if (file && !skipFloorPlanCheck) {
        const visionResult = await this.analyzeDocumentWithVision(file, text);
        if (visionResult) {
          await this.processLogger.log(
            `Vision-based classification: ${visionResult.type} (${Math.round(visionResult.confidence * 100)}% confidence)`,
            'success'
          );
          return {
            ...visionResult,
            processingTime: Date.now() - startTime
          };
        }
        // Only log if vision analysis was attempted but failed
        await this.processLogger.log(
          'Vision analysis failed or returned null, falling back to text analysis',
          'info'
        );
      } else if (skipFloorPlanCheck) {
        await this.processLogger.log(
          'Floor plan checking is disabled, skipping vision analysis',
          'info'
        );
      }

      // Fallback to text-based analysis
      const filenameAnalysis = this.analyzeFilename(metadata.filename);
      const titleAnalysis = this.analyzeDocumentTitle(text);
      const sizeAnalysis = this.analyzeDocumentSize(metadata.textLength);

      // Log all signals
      await this.processLogger.log(
        `Classification signals:\n` +
        `Filename: ${filenameAnalysis.suggestedType} (${Math.round(filenameAnalysis.confidence * 100)}%)\n` +
        `Title/Content: ${titleAnalysis.suggestedType} (${Math.round(titleAnalysis.confidence * 100)}%)\n` +
        `Document Size: ${sizeAnalysis.suggestedType} (${Math.round(sizeAnalysis.confidence * 100)}%)`,
        'info'
      );

      // Prioritize title analysis if it has high confidence
      if (titleAnalysis.confidence > 0.9) {
        return {
          type: titleAnalysis.suggestedType,
          confidence: titleAnalysis.confidence,
          processingTime: Date.now() - startTime
        };
      }

      // Weight the analyses based on reliability
      const weightedAnalyses = [
        { type: titleAnalysis.suggestedType, confidence: titleAnalysis.confidence, weight: 0.5 },
        { type: sizeAnalysis.suggestedType, confidence: sizeAnalysis.confidence, weight: 0.3 },
        { type: filenameAnalysis.suggestedType, confidence: filenameAnalysis.confidence, weight: 0.2 }
      ];

      // Calculate weighted scores for each type
      const scores = {
        original_lease: 0,
        amendment: 0
      };

      weightedAnalyses.forEach(analysis => {
        const weightedScore = analysis.confidence * analysis.weight;
        scores[analysis.type] += weightedScore;
      });

      // Determine final type based on weighted scores
      const finalType = scores.original_lease > scores.amendment ? 'original_lease' : 'amendment';
      const confidence = Math.max(scores.original_lease, scores.amendment);

      return {
        type: finalType,
        confidence,
        processingTime: Date.now() - startTime
      };
    } catch (error) {
      const message = error instanceof Error ? error.message : 'Unknown classification error';
      await this.processLogger.log(
        `Classification error: ${message}`,
        'error'
      );
      throw error;
    }
  }

  /**
   * Batch classify multiple documents using LSM-o1
   */
  async classifyBatch(documents: Array<{
    id: string;
    text: string;
    filename: string;
  }>): Promise<Record<string, {
    type: DocumentType;
    confidence: number;
    processingTime: number;
  }>> {
    const results: Record<string, any> = {};
    
    await this.processLogger.log(
      `LSM-o1 beginning batch classification of ${documents.length} documents`,
      'info'
    );

    for (const doc of documents) {
      try {
        results[doc.id] = await this.classifyDocument(doc.id, doc.text, {
          filename: doc.filename,
          textLength: doc.text.length
        });
      } catch (error) {
        await this.processLogger.log(
          `LSM-o1 failed to classify document ${doc.id}: ${error}`,
          'error'
        );
        results[doc.id] = {
          type: 'amendment',
          confidence: 0.1,
          processingTime: 0
        };
      }
    }

    return results;
  }
}
