Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Running

App Files Files Community

pradeepsengarr commited on Jun 7

Commit

611ac83

verified ·

1 Parent(s): a8283c8

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -26

app.py CHANGED Viewed

@@ -74,8 +74,8 @@ class DocumentRAG:
     def setup_fallback_model(self):
         """Fallback to smaller model if Mistral fails"""
         try:
-            # Use a better fallback model for Q&A
-            model_name = "distilgpt2"
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -86,8 +86,38 @@ class DocumentRAG:
             print("✅ Fallback model loaded")
         except Exception as e:
             print(f"❌ Fallback model failed: {e}")
             self.model = None
             self.tokenizer = None
     def extract_text_from_file(self, file_path: str) -> str:
         """Extract text from various file formats"""
@@ -251,7 +281,7 @@ class DocumentRAG:
             return ""
     def generate_answer(self, query: str, context: str) -> str:
-        """Generate answer using the LLM with improved prompting"""
         if self.model is None or self.tokenizer is None:
             return "❌ Model not available. Please try again."
@@ -261,28 +291,37 @@ class DocumentRAG:
             is_mistral = 'mistral' in model_name
             if is_mistral:
-                # Mistral-specific prompt format
-                prompt = f"""<s>[INST] You are a helpful assistant that answers questions based on the provided context. Use only the information from the context to answer. If the information is not in the context, say "I don't have enough information to answer this question."
-Context:
-{context[:1500]}
-Question: {query}
-Provide a clear and concise answer based only on the context above. [/INST]"""
             else:
-                # Generic prompt for fallback models
-                prompt = f"""Context: {context[:1000]}
-Question: {query}
-Answer based on the context:"""
             # Tokenize with proper handling
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
-                max_length=800,  # Reduced to fit in memory
                 truncation=True,
                 padding=True
             )
@@ -291,15 +330,17 @@ Answer based on the context:"""
             if torch.cuda.is_available() and next(self.model.parameters()).is_cuda:
                 inputs = {k: v.cuda() for k, v in inputs.items()}
-            # Generate with better parameters
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
-                    max_new_tokens=150,  # Reduced for more focused answers
-                    temperature=0.3,     # Lower temperature for more consistent answers
-                    do_sample=True,
-                    top_p=0.8,
-                    repetition_penalty=1.1,
                     pad_token_id=self.tokenizer.pad_token_id,
                     eos_token_id=self.tokenizer.eos_token_id
                 )
@@ -312,18 +353,64 @@ Answer based on the context:"""
                 answer = full_response.split("[/INST]")[-1].strip()
             else:
                 # For other models, remove the prompt
-                answer = full_response[len(prompt):].strip()
-            # Clean up the answer
-            answer = answer.replace(prompt, "").strip()
             return answer if answer else "I couldn't generate a proper response based on the context."
         except Exception as e:
             return f"❌ Error generating answer: {str(e)}"
     def answer_question(self, query: str) -> str:
-        """Main function to answer questions"""
         if not query.strip():
             return "❓ Please ask a question!"
@@ -337,17 +424,72 @@ Answer based on the context:"""
             if not context:
                 return "🔍 No relevant information found in the uploaded documents for your question."
-            # Generate answer
             answer = self.generate_answer(query, context)
-            # Format the response
             if answer and not answer.startswith("❌"):
                 return f"💡 **Answer:** {answer}\n\n📄 **Relevant Context:**\n{context[:400]}..."
             else:
                 return answer
         except Exception as e:
             return f"❌ Error answering question: {str(e)}"
 # Initialize the RAG system
 print("Initializing Document RAG System...")

     def setup_fallback_model(self):
         """Fallback to smaller model if Mistral fails"""
         try:
+            # Use a model that's better for factual Q&A and less prone to hallucination
+            model_name = "microsoft/DialoGPT-small"
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.model = AutoModelForCausalLM.from_pretrained(model_name)
             print("✅ Fallback model loaded")
         except Exception as e:
             print(f"❌ Fallback model failed: {e}")
+            # Try an even simpler approach - return context-based answers without generation
             self.model = None
             self.tokenizer = None
+            print("⚠️ Using context-only mode (no text generation)")
+    def simple_context_answer(self, query: str, context: str) -> str:
+        """Simple context-based answering when model is not available"""
+        if not context:
+            return "No relevant information found in the documents."
+        # Simple keyword matching approach
+        query_words = set(query.lower().split())
+        context_sentences = context.split('.')
+        # Find sentences that contain query keywords
+        relevant_sentences = []
+        for sentence in context_sentences:
+            sentence = sentence.strip()
+            if len(sentence) < 10:  # Skip very short sentences
+                continue
+            sentence_words = set(sentence.lower().split())
+            # Check if sentence contains at least 2 query words or important keywords
+            common_words = query_words.intersection(sentence_words)
+            if len(common_words) >= 2 or any(word in sentence.lower() for word in ['name', 'education', 'experience', 'skill', 'project']):
+                relevant_sentences.append(sentence)
+        if relevant_sentences:
+            # Return the most relevant sentences
+            return '. '.join(relevant_sentences[:3]) + '.'
+        else:
+            return "The information needed to answer this question is not available in the provided documents."
     def extract_text_from_file(self, file_path: str) -> str:
         """Extract text from various file formats"""
             return ""
     def generate_answer(self, query: str, context: str) -> str:
+        """Generate answer using the LLM with anti-hallucination techniques"""
         if self.model is None or self.tokenizer is None:
             return "❌ Model not available. Please try again."
             is_mistral = 'mistral' in model_name
             if is_mistral:
+                # Anti-hallucination prompt for Mistral
+                prompt = f"""<s>[INST] You are a document analysis assistant. You must ONLY answer based on the provided context. Do NOT use any external knowledge.
+STRICT RULES:
+1. Answer ONLY using information from the context below
+2. If the answer is not in the context, respond: "The information needed to answer this question is not available in the provided documents."
+3. Do NOT make assumptions or add information not in the context
+4. Quote relevant parts from the context when possible
+CONTEXT:
+{context[:1200]}
+QUESTION: {query}
+Remember: Use ONLY the context above. No external knowledge allowed. [/INST]"""
             else:
+                # Anti-hallucination prompt for fallback models
+                prompt = f"""INSTRUCTIONS: Answer the question using ONLY the information provided in the context. Do not use external knowledge.
+CONTEXT:
+{context[:800]}
+QUESTION: {query}
+ANSWER (using only the context above):"""
             # Tokenize with proper handling
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
+                max_length=600,  # Further reduced to prevent truncation issues
                 truncation=True,
                 padding=True
             )
             if torch.cuda.is_available() and next(self.model.parameters()).is_cuda:
                 inputs = {k: v.cuda() for k, v in inputs.items()}
+            # Generate with anti-hallucination parameters
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
+                    max_new_tokens=100,   # Shorter responses to reduce hallucination
+                    temperature=0.1,      # Very low temperature for factual responses
+                    do_sample=False,      # Use greedy decoding for consistency
+                    num_beams=3,          # Beam search for better quality
+                    early_stopping=True,
+                    repetition_penalty=1.2,
+                    no_repeat_ngram_size=3,
                     pad_token_id=self.tokenizer.pad_token_id,
                     eos_token_id=self.tokenizer.eos_token_id
                 )
                 answer = full_response.split("[/INST]")[-1].strip()
             else:
                 # For other models, remove the prompt
+                if "ANSWER (using only the context above):" in full_response:
+                    answer = full_response.split("ANSWER (using only the context above):")[-1].strip()
+                else:
+                    answer = full_response[len(prompt):].strip()
+            # Post-process to remove hallucinations
+            answer = self.post_process_answer(answer, context, query)
             return answer if answer else "I couldn't generate a proper response based on the context."
         except Exception as e:
             return f"❌ Error generating answer: {str(e)}"
+    def post_process_answer(self, answer: str, context: str, query: str) -> str:
+        """Post-process answer to reduce hallucinations"""
+        if not answer or len(answer) < 5:
+            return "The information needed to answer this question is not available in the provided documents."
+        # Remove common hallucination patterns
+        hallucination_patterns = [
+            "what are you doing",
+            "what do you think",
+            "in my opinion",
+            "i believe",
+            "personally",
+            "from my experience",
+            "generally speaking",
+            "it is known that",
+            "everyone knows"
+        ]
+        answer_lower = answer.lower()
+        for pattern in hallucination_patterns:
+            if pattern in answer_lower:
+                return "The information needed to answer this question is not available in the provided documents."
+        # Check if answer contains information that's not in context
+        # Simple check: if answer is much longer than query and doesn't reference context
+        if len(answer) > len(query) * 3 and not any(word in answer.lower() for word in context.lower().split()[:20]):
+            return "The information needed to answer this question is not available in the provided documents."
+        # Clean up the answer
+        answer = answer.strip()
+        # Remove repetitive parts
+        sentences = answer.split('.')
+        unique_sentences = []
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if sentence and sentence not in unique_sentences:
+                unique_sentences.append(sentence)
+        cleaned_answer = '. '.join(unique_sentences)
+        return cleaned_answer if cleaned_answer else "The information needed to answer this question is not available in the provided documents."
     def answer_question(self, query: str) -> str:
+        """Main function to answer questions with anti-hallucination measures"""
         if not query.strip():
             return "❓ Please ask a question!"
             if not context:
                 return "🔍 No relevant information found in the uploaded documents for your question."
+            # If no model available, use simple context-based answering
+            if self.model is None:
+                answer = self.simple_context_answer(query, context)
+                return f"💡 **Answer:** {answer}\n\n📄 **Source:** {context[:300]}..."
+            # Generate answer using the model
             answer = self.generate_answer(query, context)
+            # Additional validation to prevent hallucinations
             if answer and not answer.startswith("❌"):
+                # Check if answer seems to be hallucinated
+                if self.is_likely_hallucination(answer, context):
+                    answer = "The information needed to answer this question is not available in the provided documents."
                 return f"💡 **Answer:** {answer}\n\n📄 **Relevant Context:**\n{context[:400]}..."
             else:
                 return answer
         except Exception as e:
             return f"❌ Error answering question: {str(e)}"
+    def is_likely_hallucination(self, answer: str, context: str) -> bool:
+        """Check if the answer is likely a hallucination"""
+        # Convert to lowercase for comparison
+        answer_lower = answer.lower()
+        context_lower = context.lower()
+        # Check for obvious hallucination patterns
+        hallucination_indicators = [
+            "what are you doing",
+            "what do you think",
+            "how are you",
+            "i think",
+            "in my opinion",
+            "from my experience",
+            "generally speaking",
+            "it is well known",
+            "everyone knows",
+            "obviously",
+            "clearly",
+            "of course"
+        ]
+        for indicator in hallucination_indicators:
+            if indicator in answer_lower:
+                return True
+        # Check if answer contains words that are not in context
+        answer_words = set(answer_lower.split())
+        context_words = set(context_lower.split())
+        # Remove common words
+        common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should', 'this', 'that', 'these', 'those'}
+        answer_content_words = answer_words - common_words
+        context_content_words = context_words - common_words
+        # If more than 70% of content words in answer are not in context, likely hallucination
+        if len(answer_content_words) > 0:
+            overlap = len(answer_content_words.intersection(context_content_words))
+            overlap_ratio = overlap / len(answer_content_words)
+            if overlap_ratio < 0.3:  # Less than 30% overlap
+                return True
+        return False
 # Initialize the RAG system
 print("Initializing Document RAG System...")