import { getDocument } from 'pdfjs-dist';
import '../utils/pdfjs-setup';

export async function extractTextFromPDF(file: File): Promise<string> {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const pdf = await getDocument(arrayBuffer).promise;
    let text = '';

    for (let i = 1; i <= pdf.numPages; i++) {
      const page = await pdf.getPage(i);
      const content = await page.getTextContent();
      const strings = content.items.map((item: { str: string }) => item.str);
      text += strings.join(' ') + '\n';
    }

    const cleanedText = text
      .replace(/\s+/g, ' ')
      .trim();

    if (!cleanedText) {
      throw new Error('No text content found in PDF');
    }

    return cleanedText;
  } catch (error) {
    console.error('Error extracting PDF text:', error);
    throw new Error('Failed to extract text from PDF');
  }
}