import { createWorker } from 'tesseract.js';
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf';
import { ProcessingError } from './errors';

// Configure PDF.js worker
// Import the worker as a URL string
const PDFJS_WORKER_URL = new URL(
  'pdfjs-dist/build/pdf.worker.min.js',
  import.meta.url
).toString();

// Set the worker source
pdfjsLib.GlobalWorkerOptions.workerSrc = PDFJS_WORKER_URL;

async function convertPDFPageToImage(page: any): Promise<string> {
  const scale = 2.0; // Higher scale for better OCR results
  const viewport = page.getViewport({ scale });
  const canvas = document.createElement('canvas');
  const context = canvas.getContext('2d');

  if (!context) {
    throw ProcessingError.document('Failed to create canvas context');
  }

  canvas.height = viewport.height;
  canvas.width = viewport.width;

  await page.render({
    canvasContext: context,
    viewport: viewport
  }).promise;

  return canvas.toDataURL('image/png');
}

export async function extractTextFromPDF(file: File): Promise<string> {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const loadingTask = pdfjsLib.getDocument({
      data: arrayBuffer,
      disableFontFace: true,
      isEvalSupported: false,
      useSystemFonts: true
    });

    const pdf = await loadingTask.promise;
    let fullText = '';

    // First try direct text extraction
    for (let i = 1; i <= pdf.numPages; i++) {
      try {
        const page = await pdf.getPage(i);
        const textContent = await page.getTextContent();
        const pageText = textContent.items
          .map((item: any) => item.str)
          .join(' ');
        fullText += pageText + '\n\n';
      } catch (pageError) {
        console.warn(`Failed to extract text from page ${i}:`, pageError);
        continue;
      }
    }

    // If no text was extracted, fall back to OCR
    if (!fullText.trim()) {
      console.log('No text extracted directly from PDF, falling back to OCR...');
      const worker = await createWorker();
      
      for (let i = 1; i <= pdf.numPages; i++) {
        try {
          const page = await pdf.getPage(i);
          const imageUrl = await convertPDFPageToImage(page);
          const { data: { text } } = await worker.recognize(imageUrl);
          fullText += text + '\n\n';
        } catch (pageError) {
          console.warn(`Failed to OCR page ${i}:`, pageError);
          continue;
        }
      }

      await worker.terminate();
    }

    if (!fullText.trim()) {
      throw ProcessingError.document('No text content extracted from PDF');
    }

    return fullText;
  } catch (error) {
    console.error('PDF extraction error:', error);
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from PDF: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  }
}

export async function extractTextFromImage(file: File): Promise<string> {
  let worker;
  let imageUrl;
  
  try {
    worker = await createWorker();
    imageUrl = URL.createObjectURL(file);
    const { data: { text } } = await worker.recognize(imageUrl);
    
    if (!text.trim()) {
      throw ProcessingError.document('No text content extracted from image');
    }
    
    return text;
  } catch (error) {
    console.error('Image extraction error:', error);
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from image: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  } finally {
    if (worker) await worker.terminate();
    if (imageUrl) URL.revokeObjectURL(imageUrl);
  }
}

export async function extractTextFromDocument(file: File): Promise<string> {
  if (!file.type) {
    throw ProcessingError.document(`File type not detected for ${file.name}`);
  }

  try {
    if (file.type === 'application/pdf') {
      return extractTextFromPDF(file);
    } else if (file.type.startsWith('image/')) {
      return extractTextFromImage(file);
    } else {
      throw ProcessingError.document(`Unsupported file type: ${file.type}`);
    }
  } catch (error) {
    if (error instanceof ProcessingError) {
      throw error;
    }
    throw ProcessingError.document(
      `Failed to extract text from document: ${error instanceof Error ? error.message : 'Unknown error'}`,
      error as Error
    );
  }
}
