Spaces:
Sleeping
Sleeping
Version6
Browse files
app.py
CHANGED
@@ -4,21 +4,35 @@ import faiss
|
|
4 |
import re
|
5 |
import gradio as gr
|
6 |
import PyPDF2
|
7 |
-
import
|
|
|
8 |
|
9 |
def extract_text_from_pdf(pdf_file):
|
10 |
"""
|
11 |
-
Extract text from a PDF file
|
12 |
"""
|
13 |
if pdf_file is None:
|
14 |
return "Please upload a PDF file."
|
15 |
|
16 |
pdf_text = ""
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
return pdf_text
|
24 |
|
@@ -136,6 +150,9 @@ def upload_file(file):
|
|
136 |
# Extract text from PDF
|
137 |
pdf_text = extract_text_from_pdf(file)
|
138 |
|
|
|
|
|
|
|
139 |
# Initialize QA system
|
140 |
global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
|
141 |
|
@@ -318,4 +335,4 @@ with gr.Blocks(title="Interview Q&A Assistant", css=custom_css) as demo:
|
|
318 |
|
319 |
# Launch the app
|
320 |
if __name__ == "__main__":
|
321 |
-
demo.launch(
|
|
|
4 |
import re
|
5 |
import gradio as gr
|
6 |
import PyPDF2
|
7 |
+
import tempfile
|
8 |
+
import os
|
9 |
|
10 |
def extract_text_from_pdf(pdf_file):
|
11 |
"""
|
12 |
+
Extract text from a PDF file for Hugging Face Spaces
|
13 |
"""
|
14 |
if pdf_file is None:
|
15 |
return "Please upload a PDF file."
|
16 |
|
17 |
pdf_text = ""
|
18 |
+
try:
|
19 |
+
# Save the uploaded file to a temporary file
|
20 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
21 |
+
temp_pdf.write(pdf_file)
|
22 |
+
temp_path = temp_pdf.name
|
23 |
+
|
24 |
+
# Open the temporary file with PyPDF2
|
25 |
+
with open(temp_path, 'rb') as f:
|
26 |
+
pdf_reader = PyPDF2.PdfReader(f)
|
27 |
+
for page_num in range(len(pdf_reader.pages)):
|
28 |
+
page = pdf_reader.pages[page_num]
|
29 |
+
pdf_text += page.extract_text() + "\n"
|
30 |
+
|
31 |
+
# Clean up the temporary file
|
32 |
+
os.unlink(temp_path)
|
33 |
+
|
34 |
+
except Exception as e:
|
35 |
+
return f"Error processing PDF: {str(e)}"
|
36 |
|
37 |
return pdf_text
|
38 |
|
|
|
150 |
# Extract text from PDF
|
151 |
pdf_text = extract_text_from_pdf(file)
|
152 |
|
153 |
+
if isinstance(pdf_text, str) and pdf_text.startswith("Error"):
|
154 |
+
return pdf_text
|
155 |
+
|
156 |
# Initialize QA system
|
157 |
global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
|
158 |
|
|
|
335 |
|
336 |
# Launch the app
|
337 |
if __name__ == "__main__":
|
338 |
+
demo.launch()
|