priyanshu23456 commited on
Commit
e87b8a7
·
verified ·
1 Parent(s): 324a36a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -126
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, request, jsonify
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
@@ -6,12 +6,14 @@ import torch
6
  import fitz # PyMuPDF
7
  import pytesseract
8
  from pdf2image import convert_from_path
9
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
10
  from sentence_transformers import SentenceTransformer
11
  import faiss
12
  import numpy as np
13
  import tempfile
14
  from PIL import Image
 
 
15
 
16
  import logging
17
 
@@ -73,83 +75,6 @@ def initialize_models():
73
  logger.error(f"Error initializing models: {str(e)}")
74
  raise
75
 
76
- # Generation-based answering
77
- def answer_with_generation(index, embeddings, chunks, question):
78
- try:
79
- logger.info(f"Answering with generation model: '{question}'")
80
- global tokenizer, model
81
-
82
- if tokenizer is None or model is None:
83
- logger.info("Generation models not initialized, creating now...")
84
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
85
- tokenizer = AutoTokenizer.from_pretrained(model_name)
86
- model = AutoModelForCausalLM.from_pretrained(
87
- model_name,
88
- torch_dtype=torch.float16,
89
- device_map="cpu",
90
- low_cpu_mem_usage=True
91
- )
92
-
93
- if tokenizer.pad_token is None:
94
- tokenizer.pad_token = tokenizer.eos_token
95
- model.config.pad_token_id = model.config.eos_token_id
96
-
97
- # Get embeddings for question
98
- q_embedding = embedder.encode([question])
99
-
100
- # Find relevant chunks
101
- _, top_k_indices = index.search(q_embedding, k=3)
102
- relevant_chunks = [chunks[i] for i in top_k_indices[0]]
103
- context = " ".join(relevant_chunks)
104
-
105
- # Limit context size
106
- if len(context) > 2000:
107
- context = context[:2000]
108
-
109
- # Create prompt
110
- prompt = f"""<|im_start|>system
111
- You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
112
- <|im_end|>
113
- <|im_start|>user
114
- **Context**: {context}
115
-
116
- **Question**: {question}
117
-
118
- **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
119
-
120
- # Handle inputs
121
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
122
-
123
- # Move inputs to CPU
124
- inputs = {k: v.to('cpu') for k, v in inputs.items()}
125
-
126
- # Generate answer
127
- output = model.generate(
128
- **inputs,
129
- max_new_tokens=300,
130
- temperature=0.7,
131
- top_p=0.9,
132
- do_sample=True,
133
- num_beams=2,
134
- no_repeat_ngram_size=2
135
- )
136
-
137
- # Decode and format answer
138
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
139
- if "<|im_end|>" in answer:
140
- answer = answer.split("<|im_end|>")[1].strip()
141
- elif "Instruction" in answer:
142
- answer = answer.split("Instruction")[1].strip()
143
-
144
- logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
145
- return answer.strip()
146
- except Exception as e:
147
- logger.error(f"Generation error: {str(e)}")
148
- return "I couldn't generate a good answer based on the PDF content."
149
-
150
-
151
-
152
-
153
  # Cleanup function for temporary files
154
  def cleanup_temp_files(filepath):
155
  try:
@@ -297,19 +222,21 @@ def answer_with_qa_pipeline(chunks, question):
297
  logger.error(f"QA pipeline error: {str(e)}")
298
  return ""
299
 
300
- # Generation-based answering
301
- def answer_with_generation(index, embeddings, chunks, question):
302
  try:
303
- logger.info(f"Answering with generation model: '{question}'")
304
  global tokenizer, model
305
 
306
  if tokenizer is None or model is None:
307
  logger.info("Generation models not initialized, creating now...")
308
- tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
 
309
  model = AutoModelForCausalLM.from_pretrained(
310
- "distilgpt2",
311
- device_map="auto",
312
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
313
  )
314
 
315
  if tokenizer.pad_token is None:
@@ -324,41 +251,47 @@ def answer_with_generation(index, embeddings, chunks, question):
324
  relevant_chunks = [chunks[i] for i in top_k_indices[0]]
325
  context = " ".join(relevant_chunks)
326
 
327
- # Limit context size to avoid token length issues
328
- if len(context) > 4000:
329
- context = context[:4000]
330
 
331
  # Create prompt
332
- prompt = f"Answer the following question based on this information:\n\nInformation: {context}\n\nQuestion: {question}\n\nDetailed answer:"
 
 
 
 
 
 
333
 
334
  # Handle inputs
335
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
336
 
337
- # Move inputs to the right device if needed
338
- if torch.cuda.is_available():
339
- inputs = {k: v.to('cuda') for k, v in inputs.items()}
340
 
341
- # Generate answer
342
- output = model.generate(
343
  **inputs,
 
344
  max_new_tokens=300,
345
  temperature=0.7,
346
  top_p=0.9,
347
  do_sample=True,
348
- num_beams=3,
349
  no_repeat_ngram_size=2
350
  )
351
 
352
- # Decode and format answer
353
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
354
- if "Detailed answer:" in answer:
355
- answer = answer.split("Detailed answer:")[-1].strip()
356
 
357
- logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
358
- return answer.strip()
359
  except Exception as e:
360
- logger.error(f"Generation error: {str(e)}")
361
- return "I couldn't generate a good answer based on the PDF content."
 
 
 
 
362
 
363
  # API route
364
  @app.route('/')
@@ -369,6 +302,7 @@ def home():
369
  def ask():
370
  file = request.files.get("pdf")
371
  question = request.form.get("question", "")
 
372
  filepath = None
373
 
374
  if not file or not question:
@@ -379,9 +313,9 @@ def ask():
379
  filepath = os.path.join(UPLOAD_FOLDER, filename)
380
  file.save(filepath)
381
 
382
- logger.info(f"Processing file: {filename}, Question: '{question}'")
383
 
384
- # Process PDF and generate answer
385
  text = extract_text(filepath)
386
  if not text.strip():
387
  return jsonify({"error": "Could not extract text from the PDF"}), 400
@@ -389,33 +323,137 @@ def ask():
389
  chunks = split_into_chunks(text)
390
  if not chunks:
391
  return jsonify({"error": "PDF content couldn't be processed"}), 400
392
-
393
- try:
394
- answer = answer_with_qa_pipeline(chunks, question)
395
- except Exception as e:
396
- logger.warning(f"QA pipeline failed: {str(e)}")
397
- answer = ""
398
-
399
- # If QA pipeline didn't give a good answer, try generation
400
- if not answer or len(answer.strip()) < 20:
401
  try:
402
- logger.info("QA pipeline answer insufficient, trying generation...")
403
- index, embeddings, chunks = setup_faiss(chunks)
404
- answer = answer_with_generation(index, embeddings, chunks, question)
 
405
  except Exception as e:
406
- logger.error(f"Generation fallback failed: {str(e)}")
407
- return jsonify({"error": "Failed to generate answer from PDF content"}), 500
408
-
409
- return jsonify({"answer": answer})
410
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  except Exception as e:
412
  logger.error(f"Error processing request: {str(e)}")
413
  return jsonify({"error": f"An error occurred processing your request: {str(e)}"}), 500
414
  finally:
415
- # Always clean up, even if errors occur
416
- if filepath:
 
417
  cleanup_temp_files(filepath)
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  if __name__ == "__main__":
420
  try:
421
  # Initialize models at startup
 
1
+ from flask import Flask, request, jsonify, Response
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
 
6
  import fitz # PyMuPDF
7
  import pytesseract
8
  from pdf2image import convert_from_path
9
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
10
  from sentence_transformers import SentenceTransformer
11
  import faiss
12
  import numpy as np
13
  import tempfile
14
  from PIL import Image
15
+ import threading
16
+ import json
17
 
18
  import logging
19
 
 
75
  logger.error(f"Error initializing models: {str(e)}")
76
  raise
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Cleanup function for temporary files
79
  def cleanup_temp_files(filepath):
80
  try:
 
222
  logger.error(f"QA pipeline error: {str(e)}")
223
  return ""
224
 
225
+ # Generation-based answering with streaming support
226
+ def generate_streaming_answer(index, embeddings, chunks, question, streamer):
227
  try:
228
+ logger.info(f"Generating streaming answer for: '{question}'")
229
  global tokenizer, model
230
 
231
  if tokenizer is None or model is None:
232
  logger.info("Generation models not initialized, creating now...")
233
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
234
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
235
  model = AutoModelForCausalLM.from_pretrained(
236
+ model_name,
237
+ torch_dtype=torch.float16,
238
+ device_map="cpu",
239
+ low_cpu_mem_usage=True
240
  )
241
 
242
  if tokenizer.pad_token is None:
 
251
  relevant_chunks = [chunks[i] for i in top_k_indices[0]]
252
  context = " ".join(relevant_chunks)
253
 
254
+ # Limit context size
255
+ if len(context) > 2000:
256
+ context = context[:2000]
257
 
258
  # Create prompt
259
+ prompt = f"""<|im_start|>system
260
+ You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
261
+ <|im_end|>
262
+ <|im_start|>user
263
+ **Context**: {context}
264
+ **Question**: {question}
265
+ **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
266
 
267
  # Handle inputs
268
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
269
 
270
+ # Move inputs to CPU
271
+ inputs = {k: v.to('cpu') for k, v in inputs.items()}
 
272
 
273
+ # Generate answer using the streamer
274
+ generate_kwargs = dict(
275
  **inputs,
276
+ streamer=streamer,
277
  max_new_tokens=300,
278
  temperature=0.7,
279
  top_p=0.9,
280
  do_sample=True,
281
+ num_beams=2,
282
  no_repeat_ngram_size=2
283
  )
284
 
285
+ # Generate the answer (this will stream through the streamer)
286
+ model.generate(**generate_kwargs)
 
 
287
 
 
 
288
  except Exception as e:
289
+ logger.error(f"Streaming generation error: {str(e)}")
290
+ # If an error occurs during streaming, push an error message to the streamer
291
+ try:
292
+ streamer.put("I encountered an error while generating the response.")
293
+ except:
294
+ pass
295
 
296
  # API route
297
  @app.route('/')
 
302
  def ask():
303
  file = request.files.get("pdf")
304
  question = request.form.get("question", "")
305
+ streaming = request.form.get("streaming", "true").lower() == "true"
306
  filepath = None
307
 
308
  if not file or not question:
 
313
  filepath = os.path.join(UPLOAD_FOLDER, filename)
314
  file.save(filepath)
315
 
316
+ logger.info(f"Processing file: {filename}, Question: '{question}', Streaming: {streaming}")
317
 
318
+ # Process PDF and extract text
319
  text = extract_text(filepath)
320
  if not text.strip():
321
  return jsonify({"error": "Could not extract text from the PDF"}), 400
 
323
  chunks = split_into_chunks(text)
324
  if not chunks:
325
  return jsonify({"error": "PDF content couldn't be processed"}), 400
326
+
327
+ # Set up FAISS for semantic search
328
+ index, embeddings, chunks = setup_faiss(chunks)
329
+
330
+ # For non-streaming responses, use the regular approach
331
+ if not streaming:
 
 
 
332
  try:
333
+ answer = answer_with_qa_pipeline(chunks, question)
334
+ if not answer or len(answer.strip()) < 20:
335
+ answer = answer_with_generation(index, embeddings, chunks, question)
336
+ return jsonify({"answer": answer})
337
  except Exception as e:
338
+ logger.error(f"Error generating answer: {str(e)}")
339
+ return jsonify({"error": f"An error occurred: {str(e)}"}), 500
340
+
341
+ # For streaming responses, use SSE
342
+ else:
343
+ try:
344
+ # Create a streamer for the text generation
345
+ streamer = TextIteratorStreamer(
346
+ tokenizer, skip_prompt=True, skip_special_tokens=True
347
+ )
348
+
349
+ # Start generation in a separate thread
350
+ thread = threading.Thread(
351
+ target=generate_streaming_answer,
352
+ args=(index, embeddings, chunks, question, streamer)
353
+ )
354
+ thread.start()
355
+
356
+ # Stream responses as Server-Sent Events (SSE)
357
+ def generate():
358
+ for new_text in streamer:
359
+ yield f"data: {json.dumps({'response': new_text})}\n\n"
360
+ yield "data: [DONE]\n\n"
361
+
362
+ # Cleanup will happen in a separate thread after the response is complete
363
+ cleanup_thread = threading.Thread(
364
+ target=cleanup_temp_files,
365
+ args=(filepath,)
366
+ )
367
+ cleanup_thread.daemon = True
368
+ cleanup_thread.start()
369
+
370
+ return Response(generate(), mimetype="text/event-stream")
371
+
372
+ except Exception as e:
373
+ logger.error(f"Error in streaming setup: {str(e)}")
374
+ return jsonify({"error": f"An error occurred: {str(e)}"}), 500
375
+
376
  except Exception as e:
377
  logger.error(f"Error processing request: {str(e)}")
378
  return jsonify({"error": f"An error occurred processing your request: {str(e)}"}), 500
379
  finally:
380
+ # For non-streaming responses, clean up immediately
381
+ # For streaming, we clean up in a separate thread
382
+ if filepath and not streaming:
383
  cleanup_temp_files(filepath)
384
 
385
+ # Original generation function kept for non-streaming use
386
+ def answer_with_generation(index, embeddings, chunks, question):
387
+ try:
388
+ logger.info(f"Answering with generation model: '{question}'")
389
+ global tokenizer, model
390
+
391
+ if tokenizer is None or model is None:
392
+ logger.info("Generation models not initialized, creating now...")
393
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
394
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
395
+ model = AutoModelForCausalLM.from_pretrained(
396
+ model_name,
397
+ torch_dtype=torch.float16,
398
+ device_map="cpu",
399
+ low_cpu_mem_usage=True
400
+ )
401
+
402
+ if tokenizer.pad_token is None:
403
+ tokenizer.pad_token = tokenizer.eos_token
404
+ model.config.pad_token_id = model.config.eos_token_id
405
+
406
+ # Get embeddings for question
407
+ q_embedding = embedder.encode([question])
408
+
409
+ # Find relevant chunks
410
+ _, top_k_indices = index.search(q_embedding, k=3)
411
+ relevant_chunks = [chunks[i] for i in top_k_indices[0]]
412
+ context = " ".join(relevant_chunks)
413
+
414
+ # Limit context size
415
+ if len(context) > 2000:
416
+ context = context[:2000]
417
+
418
+ # Create prompt
419
+ prompt = f"""<|im_start|>system
420
+ You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
421
+ <|im_end|>
422
+ <|im_start|>user
423
+ **Context**: {context}
424
+ **Question**: {question}
425
+ **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
426
+
427
+ # Handle inputs
428
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
429
+
430
+ # Move inputs to CPU
431
+ inputs = {k: v.to('cpu') for k, v in inputs.items()}
432
+
433
+ # Generate answer
434
+ output = model.generate(
435
+ **inputs,
436
+ max_new_tokens=300,
437
+ temperature=0.7,
438
+ top_p=0.9,
439
+ do_sample=True,
440
+ num_beams=2,
441
+ no_repeat_ngram_size=2
442
+ )
443
+
444
+ # Decode and format answer
445
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
446
+ if "<|im_end|>" in answer:
447
+ answer = answer.split("<|im_end|>")[1].strip()
448
+ elif "Instruction" in answer:
449
+ answer = answer.split("Instruction")[1].strip()
450
+
451
+ logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
452
+ return answer.strip()
453
+ except Exception as e:
454
+ logger.error(f"Generation error: {str(e)}")
455
+ return "I couldn't generate a good answer based on the PDF content."
456
+
457
  if __name__ == "__main__":
458
  try:
459
  # Initialize models at startup