Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Running

App Files Files Community

Custom_Rag_Bot / app.py

pradeepsengarr

Update app.py

1dedfac verified 3 months ago

raw

history blame

13.3 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import PyPDF2
	import docx
	import io
	import os
	from typing import List, Optional

	class DocumentRAG:
	def __init__(self):
	print("🚀 Initializing RAG System...")

	# Initialize embedding model (lightweight)
	self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
	print("✅ Embedding model loaded")

	# Initialize quantized LLM
	self.setup_llm()

	# Document storage
	self.documents = []
	self.index = None
	self.is_indexed = False

	def setup_llm(self):
	"""Setup quantized Mistral model"""
	try:
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	model_name = "mistralai/Mistral-7B-Instruct-v0.1"

	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	if self.tokenizer.pad_token is None:
	self.tokenizer.pad_token = self.tokenizer.eos_token

	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=quantization_config,
	device_map="auto",
	torch_dtype=torch.float16,
	trust_remote_code=True
	)
	print("✅ Quantized Mistral model loaded")

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	# Fallback to a smaller model if Mistral fails
	self.setup_fallback_model()

	def setup_fallback_model(self):
	"""Fallback to smaller model if Mistral fails"""
	try:
	model_name = "microsoft/DialoGPT-small"
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForCausalLM.from_pretrained(model_name)
	print("✅ Fallback model loaded")
	except Exception as e:
	print(f"❌ Fallback model failed: {e}")
	self.model = None
	self.tokenizer = None

	def extract_text_from_file(self, file_path: str) -> str:
	"""Extract text from various file formats"""
	try:
	file_extension = os.path.splitext(file_path)[1].lower()

	if file_extension == '.pdf':
	return self.extract_from_pdf(file_path)
	elif file_extension == '.docx':
	return self.extract_from_docx(file_path)
	elif file_extension == '.txt':
	return self.extract_from_txt(file_path)
	else:
	return f"Unsupported file format: {file_extension}"

	except Exception as e:
	return f"Error reading file: {str(e)}"

	def extract_from_pdf(self, file_path: str) -> str:
	"""Extract text from PDF"""
	text = ""
	try:
	with open(file_path, 'rb') as file:
	pdf_reader = PyPDF2.PdfReader(file)
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	except Exception as e:
	text = f"Error reading PDF: {str(e)}"
	return text

	def extract_from_docx(self, file_path: str) -> str:
	"""Extract text from DOCX"""
	try:
	doc = docx.Document(file_path)
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text
	except Exception as e:
	return f"Error reading DOCX: {str(e)}"

	def extract_from_txt(self, file_path: str) -> str:
	"""Extract text from TXT"""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except Exception as e:
	try:
	with open(file_path, 'r', encoding='latin-1') as file:
	return file.read()
	except Exception as e2:
	return f"Error reading TXT: {str(e2)}"

	def chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
	"""Split text into overlapping chunks"""
	if not text.strip():
	return []

	words = text.split()
	chunks = []

	for i in range(0, len(words), chunk_size - overlap):
	chunk = ' '.join(words[i:i + chunk_size])
	if chunk.strip():
	chunks.append(chunk.strip())

	if i + chunk_size >= len(words):
	break

	return chunks

	def process_documents(self, files) -> str:
	"""Process uploaded files and create embeddings"""
	if not files:
	return "❌ No files uploaded!"

	try:
	all_text = ""
	processed_files = []

	# Extract text from all files
	for file in files:
	if file is None:
	continue

	file_text = self.extract_text_from_file(file.name)
	if not file_text.startswith("Error") and not file_text.startswith("Unsupported"):
	all_text += f"\n\n--- {os.path.basename(file.name)} ---\n\n{file_text}"
	processed_files.append(os.path.basename(file.name))
	else:
	return f"❌ {file_text}"

	if not all_text.strip():
	return "❌ No text extracted from files!"

	# Chunk the text
	self.documents = self.chunk_text(all_text)

	if not self.documents:
	return "❌ No valid text chunks created!"

	# Create embeddings
	print(f"📄 Creating embeddings for {len(self.documents)} chunks...")
	embeddings = self.embedder.encode(self.documents, show_progress_bar=True)

	# Build FAISS index
	dimension = embeddings.shape[1]
	self.index = faiss.IndexFlatIP(dimension)

	# Normalize embeddings for cosine similarity
	faiss.normalize_L2(embeddings)
	self.index.add(embeddings.astype('float32'))

	self.is_indexed = True

	return f"✅ Successfully processed {len(processed_files)} files:\n" + \
	f"📄 Files: {', '.join(processed_files)}\n" + \
	f"📊 Created {len(self.documents)} text chunks\n" + \
	f"🔍 Ready for Q&A!"

	except Exception as e:
	return f"❌ Error processing documents: {str(e)}"

	def retrieve_context(self, query: str, k: int = 3) -> str:
	"""Retrieve relevant context for the query"""
	if not self.is_indexed:
	return ""

	try:
	# Get query embedding
	query_embedding = self.embedder.encode([query])
	faiss.normalize_L2(query_embedding)

	# Search for similar chunks
	scores, indices = self.index.search(query_embedding.astype('float32'), k)

	# Get relevant documents
	relevant_docs = []
	for i, idx in enumerate(indices[0]):
	if idx < len(self.documents) and scores[0][i] > 0.1: # Similarity threshold
	relevant_docs.append(self.documents[idx])

	return "\n\n".join(relevant_docs)

	except Exception as e:
	print(f"Error in retrieval: {e}")
	return ""

	def generate_answer(self, query: str, context: str) -> str:
	"""Generate answer using the LLM"""
	if self.model is None or self.tokenizer is None:
	return "❌ Model not available. Please try again."

	try:
	# Create prompt
	prompt = f"""<s>[INST] Based on the following context, answer the question. If the answer is not in the context, say "I don't have enough information to answer this question."

	Context:
	{context[:2000]} # Limit context length

	Question: {query}

	Answer: [/INST]"""

	# Tokenize
	inputs = self.tokenizer(
	prompt,
	return_tensors="pt",
	max_length=1024,
	truncation=True,
	padding=True
	)

	# Generate
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True,
	top_p=0.9,
	pad_token_id=self.tokenizer.eos_token_id,
	eos_token_id=self.tokenizer.eos_token_id
	)

	# Decode response
	full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract answer (remove the prompt part)
	if "[/INST]" in full_response:
	answer = full_response.split("[/INST]")[-1].strip()
	else:
	answer = full_response[len(prompt):].strip()

	return answer if answer else "I couldn't generate a proper response."

	except Exception as e:
	return f"❌ Error generating answer: {str(e)}"

	def answer_question(self, query: str) -> str:
	"""Main function to answer questions"""
	if not query.strip():
	return "❓ Please ask a question!"

	if not self.is_indexed:
	return "📁 Please upload and process documents first!"

	try:
	# Retrieve relevant context
	context = self.retrieve_context(query)

	if not context:
	return "🔍 No relevant information found in the uploaded documents."

	# Generate answer
	answer = self.generate_answer(query, context)

	return f"💡 Answer: {answer}\n\n📄 Source Context: {context[:500]}..."

	except Exception as e:
	return f"❌ Error answering question: {str(e)}"

	# Initialize the RAG system
	print("Initializing Document RAG System...")
	rag_system = DocumentRAG()

	# Gradio Interface
	def create_interface():
	with gr.Blocks(title="📚 Document Q&A with RAG", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📚 Document Q&A System

	Upload your documents and ask questions about them!

	Supported formats: PDF, DOCX, TXT
	""")

	with gr.Tab("📤 Upload Documents"):
	with gr.Row():
	with gr.Column():
	file_upload = gr.File(
	label="Upload Documents",
	file_count="multiple",
	file_types=[".pdf", ".docx", ".txt"]
	)
	process_btn = gr.Button("🔄 Process Documents", variant="primary")

	with gr.Column():
	process_status = gr.Textbox(
	label="Processing Status",
	lines=8,
	interactive=False
	)

	process_btn.click(
	fn=rag_system.process_documents,
	inputs=[file_upload],
	outputs=[process_status]
	)

	with gr.Tab("❓ Ask Questions"):
	with gr.Row():
	with gr.Column():
	question_input = gr.Textbox(
	label="Your Question",
	placeholder="What would you like to know about your documents?",
	lines=3
	)
	ask_btn = gr.Button("🔍 Get Answer", variant="primary")

	with gr.Column():
	answer_output = gr.Textbox(
	label="Answer",
	lines=10,
	interactive=False
	)

	ask_btn.click(
	fn=rag_system.answer_question,
	inputs=[question_input],
	outputs=[answer_output]
	)

	# Example questions
	gr.Markdown("""
	### 💡 Example Questions:
	- What is the main topic of the document?
	- Can you summarize the key points?
	- What are the conclusions mentioned?
	- Are there any specific numbers or statistics?
	""")

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)