import { createWorker } from 'tesseract.js';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessingError } from './errors';

// Use the public path for the worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.min.js';

// Configure PDF.js with more robust options
GlobalWorkerOptions.workerPort = null; // Disable worker port to prevent worker issues

async function convertPDFPageToImage(page: any, rotation: number = 0): Promise<string> {
  const scale = 2.0; // Higher scale for better OCR results
  const viewport = page.getViewport({ scale, rotation });
  const canvas = document.createElement('canvas');
  const context = canvas.getContext('2d');

  if (!context) {
    throw ProcessingError.document('Failed to create canvas context');
  }

  canvas.height = viewport.height;
  canvas.width = viewport.width;

  await page.render({
    canvasContext: context,
    viewport: viewport
  }).promise;

  return canvas.toDataURL('image/png');
}

async function detectPageRotation(page: any): Promise<number> {
  // Try all 4 rotations and find the one that yields the most text
  const rotations = [0, 90, 180, 270];
  let bestRotation = 0;
  let maxTextLength = 0;

  for (const rotation of rotations) {
    try {
      const viewport = page.getViewport({ scale: 1.0, rotation });
      const textContent = await page.getTextContent();
      const pageText = textContent.items
        .map((item: any) => item.str)
        .join(' ');
      
      if (pageText.length > maxTextLength) {
        maxTextLength = pageText.length;
        bestRotation = rotation;
      }
    } catch (error) {
      console.warn(`Failed to check rotation ${rotation}:`, error);
    }
  }

  return bestRotation;
}

export async function extractTextFromPDF(file: File): Promise<string> {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const loadingTask = getDocument({
      data: arrayBuffer,
      disableFontFace: true,
      isEvalSupported: false,
      useSystemFonts: true,
      cMapUrl: 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.4.120/cmaps/',
      cMapPacked: true,
      disableRange: true,
      disableStream: true
    } as any);

    const pdf = await loadingTask.promise;
    let fullText = '';

    // First try direct text extraction with rotation detection
    for (let i = 1; i <= pdf.numPages; i++) {
      try {
        const page = await pdf.getPage(i);
        
        // Detect the best rotation for this page
        const rotation = await detectPageRotation(page);
        console.log(`Page ${i} detected rotation: ${rotation}°`);
        
        // Try direct text extraction with detected rotation
        const textContent = await page.getTextContent();
        const pageText = textContent.items
          .map((item: any) => item.str)
          .join(' ');
          
        if (pageText.trim()) {
          fullText += pageText + '\n\n';
          continue;
        }

        // If direct extraction failed, fall back to OCR with detected rotation
        console.log(`Falling back to OCR for page ${i} with ${rotation}° rotation`);
        const imageUrl = await convertPDFPageToImage(page, rotation);
        const worker = await createWorker();
        const { data: { text } } = await worker.recognize(imageUrl);
        await worker.terminate();
        
        fullText += text + '\n\n';
      } catch (pageError) {
        console.warn(`Failed to extract text from page ${i}:`, pageError);
        continue;
      }
    }

    if (!fullText.trim()) {
      throw ProcessingError.document('No text content extracted from PDF');
    }

    return fullText;
  } catch (error) {
    console.error('PDF extraction error:', error);
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from PDF: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  }
}

export async function extractTextFromImage(file: File): Promise<string> {
  let worker;
  let imageUrl;
  
  try {
    worker = await createWorker();
    imageUrl = URL.createObjectURL(file);
    const { data: { text } } = await worker.recognize(imageUrl);
    
    if (!text.trim()) {
      throw ProcessingError.document('No text content extracted from image');
    }
    
    return text;
  } catch (error) {
    console.error('Image extraction error:', error);
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from image: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  } finally {
    if (worker) await worker.terminate();
    if (imageUrl) URL.revokeObjectURL(imageUrl);
  }
}

export async function extractTextFromDocument(file: File): Promise<string> {
  if (!file.type) {
    throw ProcessingError.document(`File type not detected for ${file.name}`);
  }

  try {
    if (file.type === 'application/pdf') {
      return extractTextFromPDF(file);
    } else if (file.type.startsWith('image/')) {
      return extractTextFromImage(file);
    } else {
      throw ProcessingError.document(`Unsupported file type: ${file.type}`);
    }
  } catch (error) {
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from document: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  }
}
