Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 15 days ago

Commit

7814b36

1 Parent(s): 25c6eb9

updated

Browse files

Files changed (1) hide show

chatbot/chatbot.py +115 -125

chatbot/chatbot.py CHANGED Viewed

@@ -1,18 +1,17 @@
 # codingo/chatbot/chatbot.py
-"""Chatbot module for Codingo - Optimized for conversational AI"""
 import os
 import shutil
 from typing import List
-import traceback
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-_hf_model = None
-_hf_tokenizer = None
 _chatbot_embedder = None
 _chatbot_collection = None
@@ -20,77 +19,68 @@ _current_dir = os.path.dirname(os.path.abspath(__file__))
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
-DEFAULT_MODEL_NAME = "microsoft/DialoGPT-small"
-def _init_hf_model() -> None:
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    import torch
-    global _hf_model, _hf_tokenizer
-    if _hf_model is not None and _hf_tokenizer is not None:
         return
-    print("Initializing HF model...")
-    model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
-    print(f"Loading model: {model_name}")
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         low_cpu_mem_usage=True
     )
     model = model.to(device)
     model.eval()
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    _hf_model = model
-    _hf_tokenizer = tokenizer
-    print("Model initialization complete")
-def _init_vector_store() -> None:
     global _chatbot_embedder, _chatbot_collection
     if _chatbot_embedder is not None and _chatbot_collection is not None:
         return
     print("Initializing vector store...")
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from sentence_transformers import SentenceTransformer
     import chromadb
     from chromadb.config import Settings
     shutil.rmtree(_chroma_db_dir, ignore_errors=True)
     os.makedirs(_chroma_db_dir, exist_ok=True)
     try:
         with open(_knowledge_base_path, encoding="utf-8") as f:
             raw_text = f.read()
-            print(f"Loaded knowledge base with {len(raw_text)} characters")
     except FileNotFoundError:
-        print("Knowledge base file not found, using default text")
-        raw_text = (
-            "Codingo is an AI-powered recruitment platform designed to "
-            "streamline job applications, candidate screening, and hiring."
-        )
-    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
     docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
-    print(f"Split into {len(docs)} documents")
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
-    client = chromadb.Client(Settings(
-        anonymized_telemetry=False,
-        is_persistent=False,  # In-memory for HF Spaces
-    ))
     try:
         client.delete_collection("chatbot")
@@ -103,112 +93,112 @@ def _init_vector_store() -> None:
     _chatbot_embedder = embedder
     _chatbot_collection = collection
-    print("Vector store initialization complete")
 def get_chatbot_response(query: str) -> str:
     try:
-        print(f"\n=== Processing query: {query} ===")
         if not query or not query.strip():
-            return "Please type a question about the Codingo platform."
-        import torch
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        _init_vector_store()
-        _init_hf_model()
-        embedder = _chatbot_embedder
-        collection = _chatbot_collection
-        model = _hf_model
-        tokenizer = _hf_tokenizer
-        # Get relevant documents
-        query_embedding = embedder.encode([query])[0]
-        results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
-        retrieved_docs = results.get("documents", [[]])[0] if results else []
-        print(f"Retrieved {len(retrieved_docs)} documents")
-        # First, try to find direct answers in the retrieved documents
-        query_lower = query.lower()
-        # Check for FAQ matches
-        for doc in retrieved_docs:
-            if "Q:" in doc and "A:" in doc:
-                lines = doc.split('\n')
-                for i, line in enumerate(lines):
-                    if line.strip().startswith('Q:'):
-                        question = line[2:].strip().lower()
-                        # Check for keyword overlap
-                        query_words = set(query_lower.split())
-                        question_words = set(question.split())
-                        overlap = len(query_words & question_words)
-                        if overlap >= 2 or any(word in question for word in query_words if len(word) > 4):
-                            # Found matching question
-                            for j in range(i+1, len(lines)):
-                                if lines[j].strip().startswith('A:'):
-                                    answer = lines[j][2:].strip()
-                                    print(f"Found FAQ match: {answer}")
-                                    return answer
-                                elif lines[j].strip().startswith('Q:'):
-                                    break
-        # If no FAQ match, create a conversational response using the context
-        context_summary = ". ".join([doc[:150] for doc in retrieved_docs[:2]])
-        # Build conversation for DialoGPT
-        conversation = f"Tell me about Codingo. {context_summary} User asks: {query}"
         # Tokenize
-        inputs = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=200)
-        inputs = inputs.to(model.device)
-        # Generate
         with torch.no_grad():
-            output_ids = model.generate(
-                inputs,
-                max_length=inputs.shape[1] + 100,
-                num_beams=3,
-                temperature=0.8,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
                 do_sample=True,
                 top_p=0.9,
             )
-        # Decode
-        full_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Extract only the generated part
-        response = full_response[len(conversation):].strip()
-        # Clean up
-        if not response or len(response) < 10:
-            # Fallback: create response from context
-            if "how" in query_lower:
-                if "work" in query_lower:
-                    return "Codingo works by using AI to match candidates with suitable job postings. Candidates create profiles, upload resumes, and our AI analyzes their skills to recommend the best job matches."
-                elif "improve" in query_lower:
-                    return "To improve your match score on Codingo, update your profile with accurate skills, add relevant keywords from job descriptions, and include links to your portfolio projects."
-            elif "what" in query_lower:
-                if "codingo" in query_lower:
-                    return "Codingo is an AI-powered recruitment platform that streamlines job applications and hiring. We help candidates find suitable jobs and employers find the right talent through intelligent matching."
-                elif "special" in query_lower or "different" in query_lower:
-                    return "What makes Codingo special is our AI that understands both technical skills and language, real-time CV feedback, bias-aware algorithms, and specialized focus on tech professionals."
-            elif "can" in query_lower or "does" in query_lower:
-                if "chatbot" in query_lower:
-                    return "I can help you with questions about the Codingo platform, including how to use it, improve your profile, understand our features, and get tips for job applications."
-                elif "free" in query_lower or "cost" in query_lower:
-                    return "Profile creation and job applications are free on Codingo. Premium features may be offered for advanced analytics and additional services."
-            # Generic but relevant response
-            return "I'd be happy to help you with Codingo! You can ask me about creating profiles, job matching, CV tips, supported job types, or any other features of our recruitment platform."
         return response
     except Exception as e:
-        print(f"Error in get_chatbot_response: {e}")
         traceback.print_exc()
-        return "I apologize for the technical issue. Please try asking about Codingo's features, job matching, or how to improve your profile."

 # codingo/chatbot/chatbot.py
+"""Interactive chatbot using Flan-T5 for dynamic responses"""
 import os
 import shutil
 from typing import List
+import torch
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
+_model = None
+_tokenizer = None
 _chatbot_embedder = None
 _chatbot_collection = None
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
+# Using Flan-T5 - it's small, fast, and great for Q&A
+MODEL_NAME = "google/flan-t5-small"
+def _init_model():
+    global _model, _tokenizer
+    if _model is not None and _tokenizer is not None:
         return
+    print("Loading Flan-T5 model...")
+    from transformers import T5ForConditionalGeneration, T5Tokenizer
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
+    tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
+    model = T5ForConditionalGeneration.from_pretrained(
+        MODEL_NAME,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         low_cpu_mem_usage=True
     )
     model = model.to(device)
     model.eval()
+    _model = model
+    _tokenizer = tokenizer
+    print("Model loaded successfully!")
+def _init_vector_store():
     global _chatbot_embedder, _chatbot_collection
     if _chatbot_embedder is not None and _chatbot_collection is not None:
         return
     print("Initializing vector store...")
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from sentence_transformers import SentenceTransformer
     import chromadb
     from chromadb.config import Settings
+    # Clean and create directory
     shutil.rmtree(_chroma_db_dir, ignore_errors=True)
     os.makedirs(_chroma_db_dir, exist_ok=True)
+    # Load knowledge base
     try:
         with open(_knowledge_base_path, encoding="utf-8") as f:
             raw_text = f.read()
+            print(f"Loaded knowledge base: {len(raw_text)} characters")
     except FileNotFoundError:
+        print("Knowledge base not found!")
+        raw_text = "Codingo is an AI recruitment platform."
+    # Split into chunks
+    splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
     docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
+    print(f"Created {len(docs)} document chunks")
+    # Create embeddings
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = embedder.encode(docs, show_progress_bar=False)
+    # Create ChromaDB collection
+    client = chromadb.Client(Settings(anonymized_telemetry=False, is_persistent=False))
     try:
         client.delete_collection("chatbot")
     _chatbot_embedder = embedder
     _chatbot_collection = collection
+    print("Vector store ready!")
 def get_chatbot_response(query: str) -> str:
     try:
         if not query or not query.strip():
+            return "Hi! I'm LUNA AI. Ask me anything about Codingo!"
+        print(f"\nProcessing: '{query}'")
+        # Clear GPU cache
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        # Initialize
+        _init_vector_store()
+        _init_model()
+        # Search for relevant context
+        query_embedding = _chatbot_embedder.encode([query])[0]
+        results = _chatbot_collection.query(
+            query_embeddings=[query_embedding.tolist()],
+            n_results=3
+        )
+        retrieved_docs = results.get("documents", [[]])[0] if results else []
+        print(f"Found {len(retrieved_docs)} relevant chunks")
+        # Combine the most relevant information
+        context = " ".join(retrieved_docs[:2]) if retrieved_docs else "Codingo is an AI recruitment platform."
+        # Create a prompt for Flan-T5
+        prompt = f"""Answer the question based on the context about Codingo.
+Context: {context}
+Question: {query}
+Answer:"""
         # Tokenize
+        inputs = _tokenizer(
+            prompt,
+            max_length=512,
+            truncation=True,
+            return_tensors="pt"
+        ).to(_model.device)
+        # Generate response
         with torch.no_grad():
+            outputs = _model.generate(
+                **inputs,
+                max_new_tokens=150,
+                num_beams=4,
+                temperature=0.7,
                 do_sample=True,
                 top_p=0.9,
+                repetition_penalty=1.2
             )
+        # Decode response
+        response = _tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Generated: '{response}'")
+        # Make sure we have a good response
+        if not response or len(response) < 5:
+            # Fallback: try a simpler prompt
+            simple_prompt = f"Question about Codingo: {query}\nAnswer:"
+            inputs = _tokenizer(simple_prompt, max_length=256, truncation=True, return_tensors="pt").to(_model.device)
+            with torch.no_grad():
+                outputs = _model.generate(**inputs, max_new_tokens=100, temperature=0.8)
+            response = _tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clean up the response
+        response = response.strip()
+        # If still too short, provide a helpful response
+        if len(response) < 10:
+            if "hello" in query.lower() or "hi" in query.lower():
+                return "Hello! I'm LUNA AI, your Codingo assistant. I can help you with questions about our AI recruitment platform, job matching, CV tips, and more!"
+            else:
+                return f"I can help you with that! Based on what I know about Codingo: {retrieved_docs[0][:200] if retrieved_docs else 'Codingo is an AI-powered recruitment platform that helps match candidates with jobs.'}"
         return response
     except Exception as e:
+        print(f"Error: {e}")
+        import traceback
         traceback.print_exc()
+        return "I'm having a technical issue. Please try asking your question again!"
+# Test function
+if __name__ == "__main__":
+    # Test the chatbot
+    test_queries = [
+        "What is Codingo?",
+        "How does it work?",
+        "What makes Codingo special?",
+        "How can I improve my profile?",
+        "Is it free?"
+    ]
+    print("Testing chatbot...")
+    for q in test_queries:
+        response = get_chatbot_response(q)
+        print(f"\nQ: {q}")
+        print(f"A: {response}")
+        print("-" * 50)