# codingo/chatbot/chatbot.py """Interactive chatbot using Flan-T5 for dynamic responses""" import os import shutil from typing import List import torch os.environ.setdefault("HF_HOME", "/tmp/huggingface") os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers") os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub") _model = None _tokenizer = None _chatbot_embedder = None _chatbot_collection = None _current_dir = os.path.dirname(os.path.abspath(__file__)) _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt") _chroma_db_dir = "/tmp/chroma_db" # Using Flan-T5 - it's small, fast, and great for Q&A MODEL_NAME = "google/flan-t5-small" def _init_model(): global _model, _tokenizer if _model is not None and _tokenizer is not None: return print("Loading Flan-T5 model...") from transformers import T5ForConditionalGeneration, T5Tokenizer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME) model = T5ForConditionalGeneration.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, low_cpu_mem_usage=True ) model = model.to(device) model.eval() _model = model _tokenizer = tokenizer print("Model loaded successfully!") def _init_vector_store(): global _chatbot_embedder, _chatbot_collection if _chatbot_embedder is not None and _chatbot_collection is not None: return print("Initializing vector store...") from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import chromadb from chromadb.config import Settings # Clean and create directory shutil.rmtree(_chroma_db_dir, ignore_errors=True) os.makedirs(_chroma_db_dir, exist_ok=True) # Load knowledge base try: with open(_knowledge_base_path, encoding="utf-8") as f: raw_text = f.read() print(f"Loaded knowledge base: {len(raw_text)} characters") except FileNotFoundError: print("Knowledge base not found!") raw_text = "Codingo is an AI recruitment platform." # Split into chunks splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50) docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()] print(f"Created {len(docs)} document chunks") # Create embeddings embedder = SentenceTransformer("all-MiniLM-L6-v2") embeddings = embedder.encode(docs, show_progress_bar=False) # Create ChromaDB collection client = chromadb.Client(Settings(anonymized_telemetry=False, is_persistent=False)) try: client.delete_collection("chatbot") except: pass collection = client.create_collection("chatbot") ids = [f"doc_{i}" for i in range(len(docs))] collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) _chatbot_embedder = embedder _chatbot_collection = collection print("Vector store ready!") def get_chatbot_response(query: str) -> str: try: if not query or not query.strip(): return "Hi! I'm LUNA AI. Ask me anything about Codingo!" print(f"\nProcessing: '{query}'") # Clear GPU cache if torch.cuda.is_available(): torch.cuda.empty_cache() # Initialize _init_vector_store() _init_model() # Search for relevant context query_embedding = _chatbot_embedder.encode([query])[0] results = _chatbot_collection.query( query_embeddings=[query_embedding.tolist()], n_results=3 ) retrieved_docs = results.get("documents", [[]])[0] if results else [] print(f"Found {len(retrieved_docs)} relevant chunks") # Combine the most relevant information context = " ".join(retrieved_docs[:2]) if retrieved_docs else "Codingo is an AI recruitment platform." # Create a prompt for Flan-T5 prompt = f"""Answer the question based on the context about Codingo. Context: {context} Question: {query} Answer:""" # Tokenize inputs = _tokenizer( prompt, max_length=512, truncation=True, return_tensors="pt" ).to(_model.device) # Generate response with torch.no_grad(): outputs = _model.generate( **inputs, max_new_tokens=150, num_beams=4, temperature=0.7, do_sample=True, top_p=0.9, repetition_penalty=1.2 ) # Decode response response = _tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"Generated: '{response}'") # Make sure we have a good response if not response or len(response) < 5: # Fallback: try a simpler prompt simple_prompt = f"Question about Codingo: {query}\nAnswer:" inputs = _tokenizer(simple_prompt, max_length=256, truncation=True, return_tensors="pt").to(_model.device) with torch.no_grad(): outputs = _model.generate(**inputs, max_new_tokens=100, temperature=0.8) response = _tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean up the response response = response.strip() # If still too short, provide a helpful response if len(response) < 10: if "hello" in query.lower() or "hi" in query.lower(): return "Hello! I'm LUNA AI, your Codingo assistant. I can help you with questions about our AI recruitment platform, job matching, CV tips, and more!" else: return f"I can help you with that! Based on what I know about Codingo: {retrieved_docs[0][:200] if retrieved_docs else 'Codingo is an AI-powered recruitment platform that helps match candidates with jobs.'}" return response except Exception as e: print(f"Error: {e}") import traceback traceback.print_exc() return "I'm having a technical issue. Please try asking your question again!" # Test function if __name__ == "__main__": # Test the chatbot test_queries = [ "What is Codingo?", "How does it work?", "What makes Codingo special?", "How can I improve my profile?", "Is it free?" ] print("Testing chatbot...") for q in test_queries: response = get_chatbot_response(q) print(f"\nQ: {q}") print(f"A: {response}") print("-" * 50)