Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 | 
             
            from fastapi import FastAPI
         | 
| 2 | 
             
            import os
         | 
| 3 | 
            -
            import  | 
| 4 | 
             
            from pptx import Presentation
         | 
| 5 | 
             
            from sentence_transformers import SentenceTransformer
         | 
| 6 | 
             
            import torch
         | 
| @@ -31,7 +31,7 @@ os.makedirs(IMAGE_FOLDER, exist_ok=True) | |
| 31 | 
             
            # Extract Text from PDF
         | 
| 32 | 
             
            def extract_text_from_pdf(pdf_path):
         | 
| 33 | 
             
                try:
         | 
| 34 | 
            -
                    doc =  | 
| 35 | 
             
                    text = " ".join(page.get_text() for page in doc)
         | 
| 36 | 
             
                    return text.strip() if text else None
         | 
| 37 | 
             
                except Exception as e:
         | 
| @@ -53,7 +53,7 @@ def extract_text_from_pptx(pptx_path): | |
| 53 | 
             
            # Extract Images from PDF
         | 
| 54 | 
             
            def extract_images_from_pdf(pdf_path):
         | 
| 55 | 
             
                try:
         | 
| 56 | 
            -
                    doc =  | 
| 57 | 
             
                    images = []
         | 
| 58 | 
             
                    for i, page in enumerate(doc):
         | 
| 59 | 
             
                        for img_index, img in enumerate(page.get_images(full=True)):
         | 
|  | |
| 1 | 
             
            from fastapi import FastAPI
         | 
| 2 | 
             
            import os
         | 
| 3 | 
            +
            import pymupdf  # PyMuPDF
         | 
| 4 | 
             
            from pptx import Presentation
         | 
| 5 | 
             
            from sentence_transformers import SentenceTransformer
         | 
| 6 | 
             
            import torch
         | 
|  | |
| 31 | 
             
            # Extract Text from PDF
         | 
| 32 | 
             
            def extract_text_from_pdf(pdf_path):
         | 
| 33 | 
             
                try:
         | 
| 34 | 
            +
                    doc = pymupdf.open(pdf_path)
         | 
| 35 | 
             
                    text = " ".join(page.get_text() for page in doc)
         | 
| 36 | 
             
                    return text.strip() if text else None
         | 
| 37 | 
             
                except Exception as e:
         | 
|  | |
| 53 | 
             
            # Extract Images from PDF
         | 
| 54 | 
             
            def extract_images_from_pdf(pdf_path):
         | 
| 55 | 
             
                try:
         | 
| 56 | 
            +
                    doc = pymupdf.open(pdf_path)
         | 
| 57 | 
             
                    images = []
         | 
| 58 | 
             
                    for i, page in enumerate(doc):
         | 
| 59 | 
             
                        for img_index, img in enumerate(page.get_images(full=True)):
         |