Spaces:

TKM03
/

PDF_based_chatbot

Sleeping

TKM03 commited on Feb 18

Commit

3991753

verified ·

1 Parent(s): da3e470

Version6

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,21 +4,35 @@ import faiss
 import re
 import gradio as gr
 import PyPDF2
-import io
 def extract_text_from_pdf(pdf_file):
     """
-    Extract text from a PDF file
     """
     if pdf_file is None:
         return "Please upload a PDF file."
     pdf_text = ""
-    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
-    for page_num in range(len(pdf_reader.pages)):
-        page = pdf_reader.pages[page_num]
-        pdf_text += page.extract_text() + "\n"
     return pdf_text
@@ -136,6 +150,9 @@ def upload_file(file):
             # Extract text from PDF
             pdf_text = extract_text_from_pdf(file)
             # Initialize QA system
             global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
@@ -318,4 +335,4 @@ with gr.Blocks(title="Interview Q&A Assistant", css=custom_css) as demo:
 # Launch the app
 if __name__ == "__main__":
-    demo.launch(share=True)

 import re
 import gradio as gr
 import PyPDF2
+import tempfile
+import os
 def extract_text_from_pdf(pdf_file):
     """
+    Extract text from a PDF file for Hugging Face Spaces
     """
     if pdf_file is None:
         return "Please upload a PDF file."
     pdf_text = ""
+    try:
+        # Save the uploaded file to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
+            temp_pdf.write(pdf_file)
+            temp_path = temp_pdf.name
+        # Open the temporary file with PyPDF2
+        with open(temp_path, 'rb') as f:
+            pdf_reader = PyPDF2.PdfReader(f)
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                pdf_text += page.extract_text() + "\n"
+        # Clean up the temporary file
+        os.unlink(temp_path)
+    except Exception as e:
+        return f"Error processing PDF: {str(e)}"
     return pdf_text
             # Extract text from PDF
             pdf_text = extract_text_from_pdf(file)
+            if isinstance(pdf_text, str) and pdf_text.startswith("Error"):
+                return pdf_text
             # Initialize QA system
             global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()