Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient #imports huggingface models | |
# NEW LIBRARIES | |
from sentence_transformers import SentenceTransformer | |
import torch | |
import numpy as np | |
## START NEW CODE | |
# Load and process the knowledge base text file | |
with open("knowledge.txt", "r", encoding="utf-8") as f: | |
knowledge_text = f.read() | |
# Split the text into chunks (for example, by paragraphs) | |
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()] | |
# Load an embedding model (this one is light and fast) | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
# Precompute embeddings for all chunks (as a tensor for fast similarity search) | |
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True) | |
def get_relevant_context(query, top_k=3): | |
""" | |
Compute the embedding for the query, compare it against all chunk embeddings, | |
and return the top_k most similar chunks concatenated into a context string. | |
""" | |
# Compute and normalize the query embedding | |
query_embedding = embedder.encode(query, convert_to_tensor=True) | |
query_embedding = query_embedding / query_embedding.norm() | |
# Normalize chunk embeddings along the embedding dimension | |
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) | |
# Compute cosine similarity between the query and each chunk | |
similarities = torch.matmul(norm_chunk_embeddings, query_embedding) | |
# Get the indices of the top_k most similar chunks | |
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy() | |
# Concatenate the top chunks into a single context string | |
context = "\n\n".join([chunks[i] for i in top_k_indices]) | |
return context | |
## END OF NEW CODE | |
client = InferenceClient("google/gemma-2-2b-it") | |
def respond(message, history): | |
messages = [{"role": "system", "content": "You are PrisMate, an encouraging AI mentor and girlboss energy assistant for high school students and aspiring women/minorities in tech. Your mission is to share hidden tech history, resources, and communities that combat cultural erasure while building inclusive pathways into technology careers. You know the contributions of underrepresented pioneers, specific organizations and scholarships, mentorship programs, and practical career guidance. Be genuinely personable and helpful—keep responses short, concise, and clear while being warm, encouraging, and culturally aware with that empowering feminine energy. Only discuss topics relevant to tech careers, education, and supporting underrepresented groups in technology. If asked about unrelated topics (like food, entertainment, etc.), politely redirect by saying something like I'm here to support you on your tech journey! Let's talk about how I can help you succeed in technology. Provide actionable advice with concrete next steps, highlight overlooked historical figures, connect students to relevant communities, and help them see their backgrounds as strengths. Explain concepts at high school level and always end with something they can do right away."}] | |
# NEW CODE | |
# Retrieve context relevant to the current user message | |
context = get_relevant_context(message, top_k=3) | |
# add all previous messages to the messages list | |
if history: | |
for user_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": user_msg}) | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# add the current user's message to the messages list | |
messages.append({"role": "user", "content": message}) | |
# makes the chat completion API call, | |
# sending the messages and other parameters to the model | |
# implements streaming, where one word/token appears at a time | |
response = "" | |
# iterate through each message in the method | |
for message in client.chat_completion( | |
messages, | |
max_tokens=450, | |
temperature=0.1, | |
stream=True): | |
# add the tokens to the output content | |
token = message.choices[0].delta.content # capture the most recent toke | |
response += token # Add it to the response | |
yield response # yield the response: | |
custom_theme = gr.themes.Soft( | |
primary_hue="purple", | |
secondary_hue="pink", | |
neutral_hue="pink", | |
spacing_size="lg", | |
radius_size="lg", | |
text_size="lg", | |
) | |
css = """ | |
body { | |
background-color: #C13B6F !important; /* A more vibrant and dark pink */ | |
} | |
.gradio-block { | |
background-color: #D0CFCF !important; /* Timberwolf for chatbot background */ | |
} | |
""" | |
with gr.Blocks(theme=custom_theme) as chatbot: # Removed the css argument from here | |
gr.HTML(f"<style>{css}</style>") # Inject CSS for background color | |
gr.Image( | |
value="banner.jpg", | |
show_label=False, | |
show_share_button=False, | |
show_download_button=False | |
) | |
# Define the chatbot interface outside the respond function | |
gr.ChatInterface( | |
respond, | |
examples=["Teach me about minorities in tech", "Help me find statistics about Women in tech", "What are some communities/groups I can join for tech inclusiveness!"], | |
#description="This is a minority inclusivity bot" | |
) | |
# Launch the chatbot interface | |
chatbot.launch() |