TKM03 commited on
Commit
f4b84de
·
verified ·
1 Parent(s): c3d4aea

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -161
app.py DELETED
@@ -1,161 +0,0 @@
1
- import numpy as np
2
- from sentence_transformers import SentenceTransformer
3
- import faiss
4
- import re
5
- import gradio as gr
6
-
7
- def preprocess_text(text):
8
- """
9
- Preprocess the text into structured question-answer pairs
10
- """
11
- # Split text into sections by questions
12
- sections = []
13
- current_section = []
14
-
15
- for line in text.split('\n'):
16
- line = line.strip()
17
- if line.startswith('Question'):
18
- if current_section:
19
- sections.append(' '.join(current_section))
20
- current_section = [line]
21
- elif line:
22
- current_section.append(line)
23
-
24
- if current_section:
25
- sections.append(' '.join(current_section))
26
-
27
- # Create a structured format
28
- structured_sections = []
29
- for section in sections:
30
- # Remove page numbers and other irrelevant text
31
- section = re.sub(r'\d+\s*$', '', section)
32
- section = re.sub(r'TRAPS:|BEST ANSWER:|PASSABLE ANSWER:', ' ', section)
33
- structured_sections.append(section.strip())
34
-
35
- return structured_sections
36
-
37
- def query_qa_system(question, model, index, text_chunks, similarity_threshold=0.4):
38
- """
39
- Query the QA system with improved matching
40
- """
41
- # Encode and normalize the question
42
- question_embedding = model.encode([question])
43
- faiss.normalize_L2(question_embedding)
44
-
45
- # Search for the most similar chunks
46
- k = 1 # Get only the best match
47
- similarities, indices = index.search(question_embedding, k)
48
-
49
- best_idx = indices[0][0]
50
- similarity_score = similarities[0][0] # Cosine similarity score
51
-
52
- if similarity_score >= similarity_threshold:
53
- matched_text = text_chunks[best_idx]
54
- # Extract just the question number for reference
55
- question_num = re.search(r'Question \d+:', matched_text)
56
- question_num = question_num.group(0) if question_num else "Matching section"
57
-
58
- return {
59
- 'question': question_num,
60
- 'full_text': matched_text,
61
- 'confidence': float(similarity_score),
62
- 'found_answer': True
63
- }
64
- else:
65
- return {
66
- 'question': None,
67
- 'full_text': "I couldn't find a sufficiently relevant answer to your question in the provided document.",
68
- 'confidence': float(similarity_score),
69
- 'found_answer': False
70
- }
71
-
72
- # Function to handle PDF file upload and initialization
73
- def initialize_qa_system(pdf_file):
74
- # Read the uploaded PDF
75
- try:
76
- from PyPDF2 import PdfReader
77
- pdf_reader = PdfReader(pdf_file.name)
78
- pdf_text = ""
79
- for page in pdf_reader.pages:
80
- text = page.extract_text()
81
- if text:
82
- pdf_text += text + "\n"
83
-
84
- # Process text and create embeddings
85
- text_chunks = preprocess_text(pdf_text)
86
- model = SentenceTransformer("all-MiniLM-L6-v2")
87
- embeddings = model.encode(text_chunks)
88
-
89
- # Create index
90
- dimension = embeddings.shape[1]
91
- faiss.normalize_L2(embeddings)
92
- index = faiss.IndexFlatIP(dimension)
93
- index.add(embeddings)
94
-
95
- return {
96
- 'model': model,
97
- 'index': index,
98
- 'text_chunks': text_chunks,
99
- 'status': f"System initialized with {len(text_chunks)} text chunks from your PDF!"
100
- }
101
- except Exception as e:
102
- return {
103
- 'model': None,
104
- 'index': None,
105
- 'text_chunks': None,
106
- 'status': f"Error: {str(e)}"
107
- }
108
-
109
- # Global variables to store our QA system components
110
- qa_system = {'model': None, 'index': None, 'text_chunks': None}
111
-
112
- # Function to handle file upload
113
- def upload_file(pdf_file):
114
- global qa_system
115
- result = initialize_qa_system(pdf_file)
116
- qa_system = result
117
- return result['status']
118
-
119
- # Function to handle questions
120
- def answer_question(question):
121
- global qa_system
122
-
123
- if not qa_system['model'] or not qa_system['index'] or not qa_system['text_chunks']:
124
- return "Please upload a PDF file first."
125
-
126
- result = query_qa_system(question, qa_system['model'], qa_system['index'], qa_system['text_chunks'])
127
- answer_start = result['full_text'].find('Answer:') + len('Answer:')
128
- answer = result['full_text'][answer_start:].strip()
129
-
130
-
131
- if result['found_answer']:
132
- return f"Match (confidence: {result['confidence']:.2f}):\n\n{answer}"
133
- else:
134
- return f"{answer}\nBest match confidence: {result['confidence']:.2f}"
135
-
136
- # Create the Gradio interface
137
- with gr.Blocks(title="Interview Q&A Assistant") as demo:
138
- gr.Markdown("# Interview Q&A Assistant")
139
- gr.Markdown("Upload your interview questions PDF and ask questions to get the most relevant sections.")
140
-
141
- with gr.Row():
142
- with gr.Column():
143
- pdf_upload = gr.File(label="Upload PDF File")
144
- upload_button = gr.Button("Initialize Q&A System")
145
- status_text = gr.Textbox(label="Status", value="Upload a PDF to begin")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- question_input = gr.Textbox(label="Ask a question about interview preparation")
150
- submit_button = gr.Button("Get Answer")
151
-
152
- with gr.Row():
153
- answer_output = gr.Textbox(label="Answer", lines=10)
154
-
155
- # Set up events
156
- upload_button.click(upload_file, inputs=pdf_upload, outputs=status_text)
157
- submit_button.click(answer_question, inputs=question_input, outputs=answer_output)
158
-
159
- # Launch the app
160
- if __name__ == "__main__":
161
- demo.launch(share=True)