Spaces:
Sleeping
Sleeping
Version5
Browse files
app.py
CHANGED
@@ -3,11 +3,24 @@ from sentence_transformers import SentenceTransformer
|
|
3 |
import faiss
|
4 |
import re
|
5 |
import gradio as gr
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def preprocess_text(text):
|
13 |
"""
|
@@ -110,8 +123,45 @@ def ask_question(question, model, index, text_chunks):
|
|
110 |
print(result['full_text'])
|
111 |
print(f"Best match confidence: {result['confidence']:.2f}")
|
112 |
return result
|
113 |
-
|
114 |
-
model, index,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
# Custom CSS for professional styling
|
117 |
custom_css = """
|
@@ -262,7 +312,7 @@ with gr.Blocks(title="Interview Q&A Assistant", css=custom_css) as demo:
|
|
262 |
</div>
|
263 |
""")
|
264 |
|
265 |
-
# Set up events
|
266 |
upload_button.click(upload_file, inputs=pdf_upload, outputs=status_text)
|
267 |
submit_button.click(answer_question, inputs=question_input, outputs=answer_output)
|
268 |
|
|
|
3 |
import faiss
|
4 |
import re
|
5 |
import gradio as gr
|
6 |
+
import PyPDF2
|
7 |
+
import io
|
8 |
|
9 |
+
def extract_text_from_pdf(pdf_file):
|
10 |
+
"""
|
11 |
+
Extract text from a PDF file
|
12 |
+
"""
|
13 |
+
if pdf_file is None:
|
14 |
+
return "Please upload a PDF file."
|
15 |
+
|
16 |
+
pdf_text = ""
|
17 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
|
18 |
+
|
19 |
+
for page_num in range(len(pdf_reader.pages)):
|
20 |
+
page = pdf_reader.pages[page_num]
|
21 |
+
pdf_text += page.extract_text() + "\n"
|
22 |
+
|
23 |
+
return pdf_text
|
24 |
|
25 |
def preprocess_text(text):
|
26 |
"""
|
|
|
123 |
print(result['full_text'])
|
124 |
print(f"Best match confidence: {result['confidence']:.2f}")
|
125 |
return result
|
126 |
+
|
127 |
+
# Global variables to store model, index, and text chunks
|
128 |
+
global_model = None
|
129 |
+
global_index = None
|
130 |
+
global_text_chunks = None
|
131 |
+
|
132 |
+
def upload_file(file):
|
133 |
+
global global_model, global_index, global_text_chunks
|
134 |
+
if file is not None:
|
135 |
+
try:
|
136 |
+
# Extract text from PDF
|
137 |
+
pdf_text = extract_text_from_pdf(file)
|
138 |
+
|
139 |
+
# Initialize QA system
|
140 |
+
global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
|
141 |
+
|
142 |
+
return "✅ Document processed successfully! You can now ask questions."
|
143 |
+
except Exception as e:
|
144 |
+
return f"❌ Error processing document: {str(e)}"
|
145 |
+
else:
|
146 |
+
return "❌ Please upload a PDF file."
|
147 |
+
|
148 |
+
def answer_question(question):
|
149 |
+
global global_model, global_index, global_text_chunks
|
150 |
+
|
151 |
+
if global_model is None or global_index is None or global_text_chunks is None:
|
152 |
+
return "Please upload and process a document first."
|
153 |
+
|
154 |
+
if not question.strip():
|
155 |
+
return "Please enter a question."
|
156 |
+
|
157 |
+
result = query_qa_system(question, global_model, global_index, global_text_chunks)
|
158 |
+
|
159 |
+
if result['found_answer']:
|
160 |
+
response = f"Found matching section (confidence: {result['confidence']:.2f}):\n\n{result['full_text']}"
|
161 |
+
else:
|
162 |
+
response = f"{result['full_text']}\nBest match confidence: {result['confidence']:.2f}"
|
163 |
+
|
164 |
+
return response
|
165 |
|
166 |
# Custom CSS for professional styling
|
167 |
custom_css = """
|
|
|
312 |
</div>
|
313 |
""")
|
314 |
|
315 |
+
# Set up events
|
316 |
upload_button.click(upload_file, inputs=pdf_upload, outputs=status_text)
|
317 |
submit_button.click(answer_question, inputs=question_input, outputs=answer_output)
|
318 |
|