Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 17 days ago

Commit

0c4a8eb

1 Parent(s): 72f831c

chatbot updated

Browse files

Files changed (1) hide show

chatbot/chatbot.py +154 -61

chatbot/chatbot.py CHANGED Viewed

@@ -36,15 +36,37 @@ def _init_hf_model() -> None:
     model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     try:
         model = AutoModelForCausalLM.from_pretrained(model_name)
     except Exception:
-        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
     model = model.to(device)
     if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
     _hf_model = model
     _hf_tokenizer = tokenizer
@@ -58,8 +80,10 @@ def _init_vector_store() -> None:
     import chromadb
     from chromadb.config import Settings
-    shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
     os.makedirs(_chroma_db_dir, exist_ok=True)
     try:
         with open(_knowledge_base_path, encoding="utf-8") as f:
             raw_text = f.read()
@@ -73,74 +97,143 @@ def _init_vector_store() -> None:
     splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
     docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
     embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
     client = chromadb.Client(Settings(
         persist_directory=_chroma_db_dir,
         anonymized_telemetry=False,
         is_persistent=True,
     ))
-    collection = client.get_or_create_collection("chatbot")
     try:
-        existing = collection.get(limit=1)
-        if not existing.get("documents"):
-            raise ValueError("Empty Chroma DB")
-    except Exception:
-        ids = [f"doc_{i}" for i in range(len(docs))]
-        collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
     _chatbot_embedder = embedder
     _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
-    if not query or not query.strip():
-        return "Please type a question about the Codingo platform."
-    _init_vector_store()
-    _init_hf_model()
-    embedder = _chatbot_embedder
-    collection = _chatbot_collection
-    model = _hf_model
-    tokenizer = _hf_tokenizer
-    import torch
-    query_embedding = embedder.encode([query])[0]
-    results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
-    retrieved_docs = results.get("documents", [[]])[0] if results else []
-    context = "\n".join(retrieved_docs[:3])
-    system_instruction = (
-        "You are LUNA AI, a helpful assistant for the Codingo recruitment "
-        "platform. Use the provided context to answer questions about "
-        "Codingo. If the question is not related to Codingo, politely "
-        "redirect the conversation. Keep responses concise and friendly."
-    )
-    prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
-    inputs = tokenizer.encode(
-        prompt, return_tensors="pt", truncation=True, max_length=512, padding=True
-    ).to(model.device)
-    with torch.no_grad():
-        output_ids = model.generate(
-            inputs,
-            max_new_tokens=150,
-            num_beams=3,
-            do_sample=True,
-            temperature=0.7,
-            pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id,
-            early_stopping=True,
         )
-    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    if "LUNA AI:" in response:
-        response = response.split("LUNA AI:")[-1].strip()
-    elif prompt in response:
-        response = response.replace(prompt, "").strip()
-    return (
-        response
-        if response
-        else "I'm here to help you with questions about the Codingo platform. What would you like to know?"
-    )

     model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Initialize tokenizer with proper configuration
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Try loading the model with proper error handling
     try:
         model = AutoModelForCausalLM.from_pretrained(model_name)
+        model_type = "causal"
     except Exception:
+        try:
+            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+            model_type = "seq2seq"
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise
+    # Move model to device
     model = model.to(device)
+    model.eval()  # Set to evaluation mode
+    # Ensure proper padding token configuration
     if tokenizer.pad_token is None:
+        if tokenizer.eos_token is not None:
+            tokenizer.pad_token = tokenizer.eos_token
+        else:
+            tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+            model.resize_token_embeddings(len(tokenizer))
+    # Store model type for later use
+    model.model_type = model_type
     _hf_model = model
     _hf_tokenizer = tokenizer
     import chromadb
     from chromadb.config import Settings
+    # Clean up old database
+    shutil.rmtree(_chroma_db_dir, ignore_errors=True)
     os.makedirs(_chroma_db_dir, exist_ok=True)
     try:
         with open(_knowledge_base_path, encoding="utf-8") as f:
             raw_text = f.read()
     splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
     docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
+    # Initialize embedder
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
     embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
+    # Initialize ChromaDB
     client = chromadb.Client(Settings(
         persist_directory=_chroma_db_dir,
         anonymized_telemetry=False,
         is_persistent=True,
     ))
+    # Create or recreate collection
     try:
+        client.delete_collection("chatbot")
+    except:
+        pass
+    collection = client.create_collection("chatbot")
+    # Add documents
+    ids = [f"doc_{i}" for i in range(len(docs))]
+    collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
     _chatbot_embedder = embedder
     _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
+    try:
+        if not query or not query.strip():
+            return "Please type a question about the Codingo platform."
+        # Clear GPU cache before processing
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        _init_vector_store()
+        _init_hf_model()
+        embedder = _chatbot_embedder
+        collection = _chatbot_collection
+        model = _hf_model
+        tokenizer = _hf_tokenizer
+        import torch
+        # Get relevant documents
+        query_embedding = embedder.encode([query])[0]
+        results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
+        retrieved_docs = results.get("documents", [[]])[0] if results else []
+        context = "\n".join(retrieved_docs[:3])
+        # Prepare the prompt based on model type
+        if hasattr(model, 'model_type') and model.model_type == "seq2seq":
+            # For seq2seq models like BlenderBot
+            prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
+        else:
+            # For causal models
+            system_instruction = (
+                "You are LUNA AI, a helpful assistant for the Codingo recruitment "
+                "platform. Use the provided context to answer questions about "
+                "Codingo. If the question is not related to Codingo, politely "
+                "redirect the conversation. Keep responses concise and friendly."
+            )
+            prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
+        # Tokenize with proper handling
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True,
+            return_attention_mask=True
         )
+        # Move all tensors to the same device
+        inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # Generate response with error handling
+        with torch.no_grad():
+            try:
+                # Use different generation parameters based on model type
+                if hasattr(model, 'model_type') and model.model_type == "seq2seq":
+                    output_ids = model.generate(
+                        input_ids=inputs['input_ids'],
+                        attention_mask=inputs['attention_mask'],
+                        max_new_tokens=150,
+                        min_length=10,
+                        num_beams=3,
+                        do_sample=True,
+                        temperature=0.7,
+                        top_p=0.9,
+                        pad_token_id=tokenizer.pad_token_id,
+                        eos_token_id=tokenizer.eos_token_id,
+                        early_stopping=True,
+                    )
+                else:
+                    output_ids = model.generate(
+                        input_ids=inputs['input_ids'],
+                        attention_mask=inputs['attention_mask'],
+                        max_new_tokens=150,
+                        num_beams=3,
+                        do_sample=True,
+                        temperature=0.7,
+                        pad_token_id=tokenizer.pad_token_id,
+                        eos_token_id=tokenizer.eos_token_id,
+                    )
+            except Exception as e:
+                print(f"Generation error: {e}")
+                # Fallback to a simple response
+                return "I'm here to help you with questions about the Codingo platform. Could you please rephrase your question?"
+        # Decode the response
+        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        # Clean up the response
+        if "Assistant:" in response:
+            response = response.split("Assistant:")[-1].strip()
+        elif "LUNA AI:" in response:
+            response = response.split("LUNA AI:")[-1].strip()
+        elif prompt in response:
+            response = response.replace(prompt, "").strip()
+        # Remove the input prompt if it's still in the response
+        if query in response:
+            response = response.split(query)[-1].strip()
+        return (
+            response
+            if response and len(response) > 5
+            else "I'm here to help you with questions about the Codingo platform. What would you like to know?"
+        )
+    except Exception as e:
+        print(f"Chatbot error: {e}")
+        import traceback
+        traceback.print_exc()
+        return "I apologize, but I'm having trouble processing your request. Please try again with a different question about Codingo."