TKM03 commited on
Commit
3991753
·
verified ·
1 Parent(s): da3e470
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -4,21 +4,35 @@ import faiss
4
  import re
5
  import gradio as gr
6
  import PyPDF2
7
- import io
 
8
 
9
  def extract_text_from_pdf(pdf_file):
10
  """
11
- Extract text from a PDF file
12
  """
13
  if pdf_file is None:
14
  return "Please upload a PDF file."
15
 
16
  pdf_text = ""
17
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
18
-
19
- for page_num in range(len(pdf_reader.pages)):
20
- page = pdf_reader.pages[page_num]
21
- pdf_text += page.extract_text() + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  return pdf_text
24
 
@@ -136,6 +150,9 @@ def upload_file(file):
136
  # Extract text from PDF
137
  pdf_text = extract_text_from_pdf(file)
138
 
 
 
 
139
  # Initialize QA system
140
  global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
141
 
@@ -318,4 +335,4 @@ with gr.Blocks(title="Interview Q&A Assistant", css=custom_css) as demo:
318
 
319
  # Launch the app
320
  if __name__ == "__main__":
321
- demo.launch(share=True)
 
4
  import re
5
  import gradio as gr
6
  import PyPDF2
7
+ import tempfile
8
+ import os
9
 
10
  def extract_text_from_pdf(pdf_file):
11
  """
12
+ Extract text from a PDF file for Hugging Face Spaces
13
  """
14
  if pdf_file is None:
15
  return "Please upload a PDF file."
16
 
17
  pdf_text = ""
18
+ try:
19
+ # Save the uploaded file to a temporary file
20
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
21
+ temp_pdf.write(pdf_file)
22
+ temp_path = temp_pdf.name
23
+
24
+ # Open the temporary file with PyPDF2
25
+ with open(temp_path, 'rb') as f:
26
+ pdf_reader = PyPDF2.PdfReader(f)
27
+ for page_num in range(len(pdf_reader.pages)):
28
+ page = pdf_reader.pages[page_num]
29
+ pdf_text += page.extract_text() + "\n"
30
+
31
+ # Clean up the temporary file
32
+ os.unlink(temp_path)
33
+
34
+ except Exception as e:
35
+ return f"Error processing PDF: {str(e)}"
36
 
37
  return pdf_text
38
 
 
150
  # Extract text from PDF
151
  pdf_text = extract_text_from_pdf(file)
152
 
153
+ if isinstance(pdf_text, str) and pdf_text.startswith("Error"):
154
+ return pdf_text
155
+
156
  # Initialize QA system
157
  global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
158
 
 
335
 
336
  # Launch the app
337
  if __name__ == "__main__":
338
+ demo.launch()