import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models
# NEW LIBRARIES
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
## START NEW CODE
# Load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
    knowledge_text = f.read()
# Split the text into chunks (for example, by paragraphs)
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]
# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
def get_relevant_context(query, top_k=3):
    """
     Compute the embedding for the query, compare it against all chunk embeddings,
     and return the top_k most similar chunks concatenated into a context string.
    """
    # Compute and normalize the query embedding
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding / query_embedding.norm()
    # Normalize chunk embeddings along the embedding dimension
    norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    # Compute cosine similarity between the query and each chunk
    similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
    # Get the indices of the top_k most similar chunks
    top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
    # Concatenate the top chunks into a single context string
    context = "\n\n".join([chunks[i] for i in top_k_indices])
    return context
## END OF NEW CODE
client = InferenceClient("google/gemma-2-2b-it")
def respond(message, history):
    messages = [{"role": "system", "content": "You are PrisMate, an encouraging AI mentor and girlboss energy assistant for high school students and aspiring women/minorities in tech. Your mission is to share hidden tech history, resources, and communities that combat cultural erasure while building inclusive pathways into technology careers. You know the contributions of underrepresented pioneers, specific organizations and scholarships, mentorship programs, and practical career guidance. Be genuinely personable and helpful—keep responses short, concise, and clear while being warm, encouraging, and culturally aware with that empowering feminine energy. Only discuss topics relevant to tech careers, education, and supporting underrepresented groups in technology. If asked about unrelated topics (like food, entertainment, etc.), politely redirect by saying something like I'm here to support you on your tech journey! Let's talk about how I can help you succeed in technology. Provide actionable advice with concrete next steps, highlight overlooked historical figures, connect students to relevant communities, and help them see their backgrounds as strengths. Explain concepts at high school level and always end with something they can do right away."}]
    # NEW CODE
    # Retrieve context relevant to the current user message
    context = get_relevant_context(message, top_k=3)
    # add all previous messages to the messages list
    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})
    # add the current user's message to the messages list
    messages.append({"role": "user", "content": message})
    # makes the chat completion API call,
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    response = ""
    # iterate through each message in the method
    for message in client.chat_completion(
        messages,
        max_tokens=450,
        temperature=0.1,
        stream=True):
        # add the tokens to the output content
        token = message.choices[0].delta.content # capture the most recent toke
        response += token # Add it to the response
        yield response # yield the response:

custom_theme = gr.themes.Soft(
    primary_hue="purple",
    secondary_hue="pink",
    neutral_hue="pink",
    spacing_size="lg",
    radius_size="lg",
    text_size="lg",
    
)

css = """
body {
    background-color: #C13B6F !important; /* A more vibrant and dark pink */
}
.gradio-block {
    background-color: #D0CFCF !important; /* Timberwolf for chatbot background */
}
"""
with gr.Blocks(theme=custom_theme) as chatbot: # Removed the css argument from here
    gr.HTML(f"<style>{css}</style>") # Inject CSS for background color
    gr.Image(
        value="banner.jpg",
        show_label=False,
        show_share_button=False,
        show_download_button=False
)
# Define the chatbot interface outside the respond function
    gr.ChatInterface(
    respond,
    examples=["Teach me about minorities in tech", "Help me find statistics about Women in tech", "What are some communities/groups I can join for tech inclusiveness!"],
    #description="This is a minority inclusivity bot"
)
# Launch the chatbot interface
chatbot.launch()