Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 15 days ago

Commit

25c6eb9

1 Parent(s): ab83281

updated

Browse files

Files changed (1) hide show

chatbot/chatbot.py +135 -214

chatbot/chatbot.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # codingo/chatbot/chatbot.py
-"""Chatbot module for Codingo with enhanced debugging"""
 import os
 import shutil
@@ -9,7 +9,7 @@ import traceback
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Enable synchronous CUDA errors
 _hf_model = None
 _hf_tokenizer = None
@@ -20,15 +20,10 @@ _current_dir = os.path.dirname(os.path.abspath(__file__))
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
-# Try a smaller, more reliable model for debugging
 DEFAULT_MODEL_NAME = "microsoft/DialoGPT-small"
 def _init_hf_model() -> None:
-    from transformers import (
-        AutoModelForCausalLM,
-        AutoModelForSeq2SeqLM,
-        AutoTokenizer,
-    )
     import torch
     global _hf_model, _hf_tokenizer
@@ -42,51 +37,22 @@ def _init_hf_model() -> None:
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
-    try:
-        # Initialize tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        print("Tokenizer loaded successfully")
-        # Try loading the model
-        try:
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                low_cpu_mem_usage=True
-            )
-            model_type = "causal"
-            print("Loaded as causal model")
-        except Exception as e:
-            print(f"Failed to load as causal model: {e}")
-            model = AutoModelForSeq2SeqLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                low_cpu_mem_usage=True
-            )
-            model_type = "seq2seq"
-            print("Loaded as seq2seq model")
-        # Move model to device
-        model = model.to(device)
-        model.eval()
-        print("Model moved to device and set to eval mode")
-        # Configure padding token
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-            print(f"Set pad_token to: {tokenizer.pad_token}")
-        # Store model type
-        model.model_type = model_type
-        _hf_model = model
-        _hf_tokenizer = tokenizer
-        print("Model initialization complete")
-    except Exception as e:
-        print(f"Error during model initialization: {e}")
-        traceback.print_exc()
-        raise
 def _init_vector_store() -> None:
     global _chatbot_embedder, _chatbot_collection
@@ -95,70 +61,49 @@ def _init_vector_store() -> None:
     print("Initializing vector store...")
-    try:
-        from langchain.text_splitter import RecursiveCharacterTextSplitter
-        from sentence_transformers import SentenceTransformer
-        import chromadb
-        from chromadb.config import Settings
-        # Clean up old database
-        shutil.rmtree(_chroma_db_dir, ignore_errors=True)
-        os.makedirs(_chroma_db_dir, exist_ok=True)
-        # Load knowledge base
-        try:
-            with open(_knowledge_base_path, encoding="utf-8") as f:
-                raw_text = f.read()
-                print(f"Loaded knowledge base with {len(raw_text)} characters")
-        except FileNotFoundError:
-            print("Knowledge base file not found, using default text")
-            raw_text = (
-                "Codingo is an AI-powered recruitment platform designed to "
-                "streamline job applications, candidate screening, and hiring. "
-                "We make hiring smarter, faster, and fairer through automation "
-                "and intelligent recommendations."
-            )
-        # Split text
-        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
-        docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
-        print(f"Split into {len(docs)} documents")
-        # Initialize embedder
-        print("Loading sentence transformer...")
-        embedder = SentenceTransformer("all-MiniLM-L6-v2")
-        print("Encoding documents...")
-        embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
-        print(f"Created {len(embeddings)} embeddings")
-        # Initialize ChromaDB (use in-memory for HF Spaces)
-        print("Initializing ChromaDB...")
-        client = chromadb.Client(Settings(
-            anonymized_telemetry=False,
-            is_persistent=False,  # Changed to False for HF Spaces
-        ))
-        # Create collection
-        try:
-            client.delete_collection("chatbot")
-        except:
-            pass
-        collection = client.create_collection("chatbot")
-        # Add documents
-        ids = [f"doc_{i}" for i in range(len(docs))]
-        collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
-        print(f"Added {len(docs)} documents to collection")
-        _chatbot_embedder = embedder
-        _chatbot_collection = collection
-        print("Vector store initialization complete")
-    except Exception as e:
-        print(f"Error during vector store initialization: {e}")
-        traceback.print_exc()
-        raise
 def get_chatbot_response(query: str) -> str:
     try:
@@ -167,24 +112,12 @@ def get_chatbot_response(query: str) -> str:
         if not query or not query.strip():
             return "Please type a question about the Codingo platform."
-        # Clear GPU cache
         import torch
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-            print("Cleared GPU cache")
-        # Initialize components
-        try:
-            _init_vector_store()
-        except Exception as e:
-            print(f"Vector store initialization failed: {e}")
-            return "I'm having trouble accessing my knowledge base. Please try again later."
-        try:
-            _init_hf_model()
-        except Exception as e:
-            print(f"Model initialization failed: {e}")
-            return "I'm having trouble loading my language model. Please try again later."
         embedder = _chatbot_embedder
         collection = _chatbot_collection
@@ -192,102 +125,90 @@ def get_chatbot_response(query: str) -> str:
         tokenizer = _hf_tokenizer
         # Get relevant documents
-        print("Creating query embedding...")
         query_embedding = embedder.encode([query])[0]
-        print("Searching for relevant documents...")
         results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
         retrieved_docs = results.get("documents", [[]])[0] if results else []
-        context = "\n".join(retrieved_docs[:3]) if retrieved_docs else ""
-        print(f"Retrieved {len(retrieved_docs)} documents")
-        # Prepare prompt
-        if hasattr(model, 'model_type') and model.model_type == "seq2seq":
-            prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
-        else:
-            # For DialoGPT or other causal models
-            prompt = f"Context: {context}\n\nUser: {query}\nLUNA AI:"
-        print(f"Prompt length: {len(prompt)} characters")
         # Tokenize
-        print("Tokenizing input...")
-        try:
-            inputs = tokenizer(
-                prompt,
-                return_tensors="pt",
-                truncation=True,
-                max_length=400,  # Reduced for safety
-                padding=True,
-                return_attention_mask=True
-            )
-            print(f"Input shape: {inputs['input_ids'].shape}")
-        except Exception as e:
-            print(f"Tokenization error: {e}")
-            traceback.print_exc()
-            return "I had trouble processing your input. Please try a shorter question."
-        # Move to device
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # Generate response
-        print("Generating response...")
         with torch.no_grad():
-            try:
-                output_ids = model.generate(
-                    input_ids=inputs['input_ids'],
-                    attention_mask=inputs['attention_mask'],
-                    max_new_tokens=100,  # Reduced for safety
-                    min_length=10,
-                    num_beams=2,  # Reduced for memory
-                    do_sample=True,
-                    temperature=0.8,
-                    pad_token_id=tokenizer.pad_token_id,
-                    eos_token_id=tokenizer.eos_token_id,
-                    early_stopping=True,
-                )
-                print(f"Output shape: {output_ids.shape}")
-            except Exception as e:
-                print(f"Generation error: {e}")
-                traceback.print_exc()
-                # Try a simpler generation
-                try:
-                    print("Trying simpler generation...")
-                    output_ids = model.generate(
-                        input_ids=inputs['input_ids'],
-                        max_new_tokens=50,
-                        pad_token_id=tokenizer.pad_token_id,
-                    )
-                except Exception as e2:
-                    print(f"Simple generation also failed: {e2}")
-                    return "I'm having trouble generating a response. Please try again."
-        # Decode response
-        print("Decoding response...")
-        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        print(f"Raw response: {response[:100]}...")
-        # Clean up response
-        if "LUNA AI:" in response:
-            response = response.split("LUNA AI:")[-1].strip()
-        elif "Assistant:" in response:
-            response = response.split("Assistant:")[-1].strip()
-        # Remove the input if it's in the response
-        if query in response:
-            response = response.replace(query, "").strip()
-        # Final cleanup
-        response = response.strip()
-        if not response or len(response) < 5:
-            response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
-        print(f"Final response: {response}")
         return response
     except Exception as e:
-        print(f"Unexpected error in get_chatbot_response: {e}")
         traceback.print_exc()
-        return "I apologize, but I encountered an unexpected error. Please try again with a different question."

 # codingo/chatbot/chatbot.py
+"""Chatbot module for Codingo - Optimized for conversational AI"""
 import os
 import shutil
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 _hf_model = None
 _hf_tokenizer = None
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
 DEFAULT_MODEL_NAME = "microsoft/DialoGPT-small"
 def _init_hf_model() -> None:
+    from transformers import AutoModelForCausalLM, AutoTokenizer
     import torch
     global _hf_model, _hf_tokenizer
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        low_cpu_mem_usage=True
+    )
+    model = model.to(device)
+    model.eval()
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    _hf_model = model
+    _hf_tokenizer = tokenizer
+    print("Model initialization complete")
 def _init_vector_store() -> None:
     global _chatbot_embedder, _chatbot_collection
     print("Initializing vector store...")
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from sentence_transformers import SentenceTransformer
+    import chromadb
+    from chromadb.config import Settings
+    shutil.rmtree(_chroma_db_dir, ignore_errors=True)
+    os.makedirs(_chroma_db_dir, exist_ok=True)
+    try:
+        with open(_knowledge_base_path, encoding="utf-8") as f:
+            raw_text = f.read()
+            print(f"Loaded knowledge base with {len(raw_text)} characters")
+    except FileNotFoundError:
+        print("Knowledge base file not found, using default text")
+        raw_text = (
+            "Codingo is an AI-powered recruitment platform designed to "
+            "streamline job applications, candidate screening, and hiring."
+        )
+    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
+    docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
+    print(f"Split into {len(docs)} documents")
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
+    client = chromadb.Client(Settings(
+        anonymized_telemetry=False,
+        is_persistent=False,  # In-memory for HF Spaces
+    ))
+    try:
+        client.delete_collection("chatbot")
+    except:
+        pass
+    collection = client.create_collection("chatbot")
+    ids = [f"doc_{i}" for i in range(len(docs))]
+    collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
+    _chatbot_embedder = embedder
+    _chatbot_collection = collection
+    print("Vector store initialization complete")
 def get_chatbot_response(query: str) -> str:
     try:
         if not query or not query.strip():
             return "Please type a question about the Codingo platform."
         import torch
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        _init_vector_store()
+        _init_hf_model()
         embedder = _chatbot_embedder
         collection = _chatbot_collection
         tokenizer = _hf_tokenizer
         # Get relevant documents
         query_embedding = embedder.encode([query])[0]
         results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
         retrieved_docs = results.get("documents", [[]])[0] if results else []
+        print(f"Retrieved {len(retrieved_docs)} documents")
+        # First, try to find direct answers in the retrieved documents
+        query_lower = query.lower()
+        # Check for FAQ matches
+        for doc in retrieved_docs:
+            if "Q:" in doc and "A:" in doc:
+                lines = doc.split('\n')
+                for i, line in enumerate(lines):
+                    if line.strip().startswith('Q:'):
+                        question = line[2:].strip().lower()
+                        # Check for keyword overlap
+                        query_words = set(query_lower.split())
+                        question_words = set(question.split())
+                        overlap = len(query_words & question_words)
+                        if overlap >= 2 or any(word in question for word in query_words if len(word) > 4):
+                            # Found matching question
+                            for j in range(i+1, len(lines)):
+                                if lines[j].strip().startswith('A:'):
+                                    answer = lines[j][2:].strip()
+                                    print(f"Found FAQ match: {answer}")
+                                    return answer
+                                elif lines[j].strip().startswith('Q:'):
+                                    break
+        # If no FAQ match, create a conversational response using the context
+        context_summary = ". ".join([doc[:150] for doc in retrieved_docs[:2]])
+        # Build conversation for DialoGPT
+        conversation = f"Tell me about Codingo. {context_summary} User asks: {query}"
         # Tokenize
+        inputs = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=200)
+        inputs = inputs.to(model.device)
+        # Generate
         with torch.no_grad():
+            output_ids = model.generate(
+                inputs,
+                max_length=inputs.shape[1] + 100,
+                num_beams=3,
+                temperature=0.8,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                do_sample=True,
+                top_p=0.9,
+            )
+        # Decode
+        full_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        # Extract only the generated part
+        response = full_response[len(conversation):].strip()
+        # Clean up
+        if not response or len(response) < 10:
+            # Fallback: create response from context
+            if "how" in query_lower:
+                if "work" in query_lower:
+                    return "Codingo works by using AI to match candidates with suitable job postings. Candidates create profiles, upload resumes, and our AI analyzes their skills to recommend the best job matches."
+                elif "improve" in query_lower:
+                    return "To improve your match score on Codingo, update your profile with accurate skills, add relevant keywords from job descriptions, and include links to your portfolio projects."
+            elif "what" in query_lower:
+                if "codingo" in query_lower:
+                    return "Codingo is an AI-powered recruitment platform that streamlines job applications and hiring. We help candidates find suitable jobs and employers find the right talent through intelligent matching."
+                elif "special" in query_lower or "different" in query_lower:
+                    return "What makes Codingo special is our AI that understands both technical skills and language, real-time CV feedback, bias-aware algorithms, and specialized focus on tech professionals."
+            elif "can" in query_lower or "does" in query_lower:
+                if "chatbot" in query_lower:
+                    return "I can help you with questions about the Codingo platform, including how to use it, improve your profile, understand our features, and get tips for job applications."
+                elif "free" in query_lower or "cost" in query_lower:
+                    return "Profile creation and job applications are free on Codingo. Premium features may be offered for advanced analytics and additional services."
+            # Generic but relevant response
+            return "I'd be happy to help you with Codingo! You can ask me about creating profiles, job matching, CV tips, supported job types, or any other features of our recruitment platform."
         return response
     except Exception as e:
+        print(f"Error in get_chatbot_response: {e}")
         traceback.print_exc()
+        return "I apologize for the technical issue. Please try asking about Codingo's features, job matching, or how to improve your profile."