Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Running

App Files Files Community

pradeepsengarr commited on Jun 7

Commit

8b78b3b

verified ·

1 Parent(s): 611ac83

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -141

app.py CHANGED Viewed

@@ -92,11 +92,11 @@ class DocumentRAG:
             print("⚠️ Using context-only mode (no text generation)")
     def simple_context_answer(self, query: str, context: str) -> str:
-        """Simple context-based answering when model is not available"""
         if not context:
             return "No relevant information found in the documents."
-        # Simple keyword matching approach
         query_words = set(query.lower().split())
         context_sentences = context.split('.')
@@ -108,16 +108,20 @@ class DocumentRAG:
                 continue
             sentence_words = set(sentence.lower().split())
-            # Check if sentence contains at least 2 query words or important keywords
             common_words = query_words.intersection(sentence_words)
-            if len(common_words) >= 2 or any(word in sentence.lower() for word in ['name', 'education', 'experience', 'skill', 'project']):
                 relevant_sentences.append(sentence)
         if relevant_sentences:
             # Return the most relevant sentences
             return '. '.join(relevant_sentences[:3]) + '.'
         else:
-            return "The information needed to answer this question is not available in the provided documents."
     def extract_text_from_file(self, file_path: str) -> str:
         """Extract text from various file formats"""
@@ -171,7 +175,7 @@ class DocumentRAG:
             except Exception as e2:
                 return f"Error reading TXT: {str(e2)}"
-    def chunk_text(self, text: str, chunk_size: int = 300, overlap: int = 50) -> List[str]:
         """Split text into overlapping chunks with better sentence preservation"""
         if not text.strip():
             return []
@@ -256,7 +260,7 @@ class DocumentRAG:
             return f"❌ Error processing documents: {str(e)}"
     def retrieve_context(self, query: str, k: int = 5) -> str:
-        """Retrieve relevant context for the query"""
         if not self.is_indexed:
             return ""
@@ -268,12 +272,20 @@ class DocumentRAG:
             # Search for similar chunks
             scores, indices = self.index.search(query_embedding.astype('float32'), k)
-            # Get relevant documents with higher threshold
             relevant_docs = []
             for i, idx in enumerate(indices[0]):
-                if idx < len(self.documents) and scores[0][i] > 0.2:  # Higher similarity threshold
                     relevant_docs.append(self.documents[idx])
             return "\n\n".join(relevant_docs)
         except Exception as e:
@@ -281,9 +293,9 @@ class DocumentRAG:
             return ""
     def generate_answer(self, query: str, context: str) -> str:
-        """Generate answer using the LLM with anti-hallucination techniques"""
         if self.model is None or self.tokenizer is None:
-            return "❌ Model not available. Please try again."
         try:
             # Check if using Mistral (has specific prompt format) or fallback model
@@ -291,37 +303,31 @@ class DocumentRAG:
             is_mistral = 'mistral' in model_name
             if is_mistral:
-                # Anti-hallucination prompt for Mistral
-                prompt = f"""<s>[INST] You are a document analysis assistant. You must ONLY answer based on the provided context. Do NOT use any external knowledge.
-STRICT RULES:
-1. Answer ONLY using information from the context below
-2. If the answer is not in the context, respond: "The information needed to answer this question is not available in the provided documents."
-3. Do NOT make assumptions or add information not in the context
-4. Quote relevant parts from the context when possible
-CONTEXT:
-{context[:1200]}
-QUESTION: {query}
-Remember: Use ONLY the context above. No external knowledge allowed. [/INST]"""
             else:
-                # Anti-hallucination prompt for fallback models
-                prompt = f"""INSTRUCTIONS: Answer the question using ONLY the information provided in the context. Do not use external knowledge.
-CONTEXT:
-{context[:800]}
-QUESTION: {query}
-ANSWER (using only the context above):"""
             # Tokenize with proper handling
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
-                max_length=600,  # Further reduced to prevent truncation issues
                 truncation=True,
                 padding=True
             )
@@ -330,17 +336,17 @@ ANSWER (using only the context above):"""
             if torch.cuda.is_available() and next(self.model.parameters()).is_cuda:
                 inputs = {k: v.cuda() for k, v in inputs.items()}
-            # Generate with anti-hallucination parameters
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
-                    max_new_tokens=100,   # Shorter responses to reduce hallucination
-                    temperature=0.1,      # Very low temperature for factual responses
-                    do_sample=False,      # Use greedy decoding for consistency
-                    num_beams=3,          # Beam search for better quality
                     early_stopping=True,
-                    repetition_penalty=1.2,
-                    no_repeat_ngram_size=3,
                     pad_token_id=self.tokenizer.pad_token_id,
                     eos_token_id=self.tokenizer.eos_token_id
                 )
@@ -353,64 +359,48 @@ ANSWER (using only the context above):"""
                 answer = full_response.split("[/INST]")[-1].strip()
             else:
                 # For other models, remove the prompt
-                if "ANSWER (using only the context above):" in full_response:
-                    answer = full_response.split("ANSWER (using only the context above):")[-1].strip()
                 else:
                     answer = full_response[len(prompt):].strip()
-            # Post-process to remove hallucinations
-            answer = self.post_process_answer(answer, context, query)
-            return answer if answer else "I couldn't generate a proper response based on the context."
         except Exception as e:
-            return f"❌ Error generating answer: {str(e)}"
-    def post_process_answer(self, answer: str, context: str, query: str) -> str:
-        """Post-process answer to reduce hallucinations"""
         if not answer or len(answer) < 5:
-            return "The information needed to answer this question is not available in the provided documents."
-        # Remove common hallucination patterns
-        hallucination_patterns = [
-            "what are you doing",
-            "what do you think",
-            "in my opinion",
-            "i believe",
-            "personally",
-            "from my experience",
-            "generally speaking",
-            "it is known that",
-            "everyone knows"
-        ]
-        answer_lower = answer.lower()
-        for pattern in hallucination_patterns:
-            if pattern in answer_lower:
-                return "The information needed to answer this question is not available in the provided documents."
-        # Check if answer contains information that's not in context
-        # Simple check: if answer is much longer than query and doesn't reference context
-        if len(answer) > len(query) * 3 and not any(word in answer.lower() for word in context.lower().split()[:20]):
-            return "The information needed to answer this question is not available in the provided documents."
-        # Clean up the answer
-        answer = answer.strip()
-        # Remove repetitive parts
-        sentences = answer.split('.')
-        unique_sentences = []
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if sentence and sentence not in unique_sentences:
-                unique_sentences.append(sentence)
-        cleaned_answer = '. '.join(unique_sentences)
-        return cleaned_answer if cleaned_answer else "The information needed to answer this question is not available in the provided documents."
     def answer_question(self, query: str) -> str:
-        """Main function to answer questions with anti-hallucination measures"""
         if not query.strip():
             return "❓ Please ask a question!"
@@ -419,77 +409,22 @@ ANSWER (using only the context above):"""
         try:
             # Retrieve relevant context
-            context = self.retrieve_context(query)
             if not context:
                 return "🔍 No relevant information found in the uploaded documents for your question."
-            # If no model available, use simple context-based answering
-            if self.model is None:
-                answer = self.simple_context_answer(query, context)
-                return f"💡 **Answer:** {answer}\n\n📄 **Source:** {context[:300]}..."
-            # Generate answer using the model
             answer = self.generate_answer(query, context)
-            # Additional validation to prevent hallucinations
-            if answer and not answer.startswith("❌"):
-                # Check if answer seems to be hallucinated
-                if self.is_likely_hallucination(answer, context):
-                    answer = "The information needed to answer this question is not available in the provided documents."
-                return f"💡 **Answer:** {answer}\n\n📄 **Relevant Context:**\n{context[:400]}..."
             else:
-                return answer
         except Exception as e:
             return f"❌ Error answering question: {str(e)}"
-    def is_likely_hallucination(self, answer: str, context: str) -> bool:
-        """Check if the answer is likely a hallucination"""
-        # Convert to lowercase for comparison
-        answer_lower = answer.lower()
-        context_lower = context.lower()
-        # Check for obvious hallucination patterns
-        hallucination_indicators = [
-            "what are you doing",
-            "what do you think",
-            "how are you",
-            "i think",
-            "in my opinion",
-            "from my experience",
-            "generally speaking",
-            "it is well known",
-            "everyone knows",
-            "obviously",
-            "clearly",
-            "of course"
-        ]
-        for indicator in hallucination_indicators:
-            if indicator in answer_lower:
-                return True
-        # Check if answer contains words that are not in context
-        answer_words = set(answer_lower.split())
-        context_words = set(context_lower.split())
-        # Remove common words
-        common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should', 'this', 'that', 'these', 'those'}
-        answer_content_words = answer_words - common_words
-        context_content_words = context_words - common_words
-        # If more than 70% of content words in answer are not in context, likely hallucination
-        if len(answer_content_words) > 0:
-            overlap = len(answer_content_words.intersection(context_content_words))
-            overlap_ratio = overlap / len(answer_content_words)
-            if overlap_ratio < 0.3:  # Less than 30% overlap
-                return True
-        return False
 # Initialize the RAG system
 print("Initializing Document RAG System...")

             print("⚠️ Using context-only mode (no text generation)")
     def simple_context_answer(self, query: str, context: str) -> str:
+        """Improved context-based answering when model is not available"""
         if not context:
             return "No relevant information found in the documents."
+        # Improved keyword matching approach
         query_words = set(query.lower().split())
         context_sentences = context.split('.')
                 continue
             sentence_words = set(sentence.lower().split())
+            # Check if sentence contains query keywords
             common_words = query_words.intersection(sentence_words)
+            if len(common_words) >= 1:  # Lowered threshold
                 relevant_sentences.append(sentence)
         if relevant_sentences:
             # Return the most relevant sentences
             return '. '.join(relevant_sentences[:3]) + '.'
         else:
+            # If no exact matches, return first few sentences of context
+            first_sentences = context_sentences[:2]
+            if first_sentences:
+                return '. '.join([s.strip() for s in first_sentences if s.strip()]) + '.'
+            return "Based on the document content, I found some information but cannot provide a specific answer to your question."
     def extract_text_from_file(self, file_path: str) -> str:
         """Extract text from various file formats"""
             except Exception as e2:
                 return f"Error reading TXT: {str(e2)}"
+    def chunk_text(self, text: str, chunk_size: int = 200, overlap: int = 30) -> List[str]:
         """Split text into overlapping chunks with better sentence preservation"""
         if not text.strip():
             return []
             return f"❌ Error processing documents: {str(e)}"
     def retrieve_context(self, query: str, k: int = 5) -> str:
+        """Retrieve relevant context for the query with improved retrieval"""
         if not self.is_indexed:
             return ""
             # Search for similar chunks
             scores, indices = self.index.search(query_embedding.astype('float32'), k)
+            # Get relevant documents with MUCH LOWER threshold
             relevant_docs = []
             for i, idx in enumerate(indices[0]):
+                if idx < len(self.documents) and scores[0][i] > 0.05:  # Much lower threshold
                     relevant_docs.append(self.documents[idx])
+            # If no high-similarity matches, take the top results anyway
+            if not relevant_docs:
+                for i, idx in enumerate(indices[0]):
+                    if idx < len(self.documents):
+                        relevant_docs.append(self.documents[idx])
+                        if len(relevant_docs) >= 3:  # Take at least 3 chunks
+                            break
             return "\n\n".join(relevant_docs)
         except Exception as e:
             return ""
     def generate_answer(self, query: str, context: str) -> str:
+        """Generate answer using the LLM with improved prompting"""
         if self.model is None or self.tokenizer is None:
+            return self.simple_context_answer(query, context)
         try:
             # Check if using Mistral (has specific prompt format) or fallback model
             is_mistral = 'mistral' in model_name
             if is_mistral:
+                # Improved prompt for Mistral - more flexible
+                prompt = f"""<s>[INST] You are a helpful document assistant. Answer the question based on the provided context. If the exact answer isn't in the context, provide the most relevant information available.
+Context:
+{context[:1500]}
+Question: {query}
+Please provide a helpful answer based on the available information. [/INST]"""
             else:
+                # Improved prompt for fallback models
+                prompt = f"""Based on the following information, please answer the question:
+Context:
+{context[:1000]}
+Question: {query}
+Answer:"""
             # Tokenize with proper handling
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
+                max_length=800,
                 truncation=True,
                 padding=True
             )
             if torch.cuda.is_available() and next(self.model.parameters()).is_cuda:
                 inputs = {k: v.cuda() for k, v in inputs.items()}
+            # Generate with more flexible parameters
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
+                    max_new_tokens=150,
+                    temperature=0.3,      # Slightly higher for more natural responses
+                    do_sample=True,
+                    top_p=0.9,
+                    num_beams=2,
                     early_stopping=True,
+                    repetition_penalty=1.1,
                     pad_token_id=self.tokenizer.pad_token_id,
                     eos_token_id=self.tokenizer.eos_token_id
                 )
                 answer = full_response.split("[/INST]")[-1].strip()
             else:
                 # For other models, remove the prompt
+                if "Answer:" in full_response:
+                    answer = full_response.split("Answer:")[-1].strip()
                 else:
                     answer = full_response[len(prompt):].strip()
+            # Clean up the answer
+            answer = self.clean_answer(answer)
+            return answer if answer else self.simple_context_answer(query, context)
         except Exception as e:
+            print(f"Error in generation: {e}")
+            return self.simple_context_answer(query, context)
+    def clean_answer(self, answer: str) -> str:
+        """Clean up the generated answer"""
         if not answer or len(answer) < 5:
+            return ""
+        # Remove obvious problematic patterns
+        lines = answer.split('\n')
+        cleaned_lines = []
+        for line in lines:
+            line = line.strip()
+            if line and not any(pattern in line.lower() for pattern in [
+                'what are you doing', 'what do you think', 'how are you',
+                'i am an ai', 'i cannot', 'i don\'t know'
+            ]):
+                cleaned_lines.append(line)
+        cleaned_answer = ' '.join(cleaned_lines)
+        # Limit length to prevent rambling
+        if len(cleaned_answer) > 500:
+            sentences = cleaned_answer.split('.')
+            cleaned_answer = '. '.join(sentences[:3]) + '.'
+        return cleaned_answer.strip()
     def answer_question(self, query: str) -> str:
+        """Main function to answer questions with improved handling"""
         if not query.strip():
             return "❓ Please ask a question!"
         try:
             # Retrieve relevant context
+            context = self.retrieve_context(query, k=7)  # Get more chunks
             if not context:
                 return "🔍 No relevant information found in the uploaded documents for your question."
+            # Generate answer
             answer = self.generate_answer(query, context)
+            if answer and len(answer) > 10:
+                return f"💡 **Answer:** {answer}\n\n📄 **Source Context:**\n{context[:300]}..."
             else:
+                # Fallback to simple context display
+                return f"📄 **Based on the document content:**\n{context[:500]}..."
         except Exception as e:
             return f"❌ Error answering question: {str(e)}"
 # Initialize the RAG system
 print("Initializing Document RAG System...")