Spaces:
Sleeping
Sleeping
# MiniLM Semantic FAQ Search β CPU-only HF Space | |
# Works out-of-the-box with faqs.csv in the same folder. | |
import re | |
from pathlib import Path | |
import gradio as gr | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
# ------- paths & model ------------------------------------------------- | |
BASE_DIR = Path(__file__).parent | |
CSV_FILE = BASE_DIR / "faqs.csv" | |
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
# ------- load FAQ data ------------------------------------------------- | |
if not CSV_FILE.exists(): | |
raise FileNotFoundError( | |
f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root." | |
) | |
faq_df = pd.read_csv(CSV_FILE) | |
questions = faq_df["question"].tolist() | |
answers = faq_df["answer"].tolist() | |
# ------- embed questions ---------------------------------------------- | |
model = SentenceTransformer(MODEL_NAME) | |
question_embs = model.encode( | |
questions, convert_to_tensor=True, normalize_embeddings=True | |
) | |
# ------- tiny emoji tagger -------------------------------------------- | |
EMOJI_RULES = { | |
r"\b(shampoo|conditioner|mask)\b" : "π§΄", | |
r"\b(hair\s?spray|spray)\b" : "π¨", | |
r"\b(vegan|botanical|organic)\b" : "π±", | |
r"\b(heat|thermal|hot)\b" : "π₯", | |
r"\b(balayage|color|colour|dye)\b" : "πββοΈ", | |
r"\b(scissors|cut|trim)\b" : "βοΈ", | |
} | |
def emoji_for(text: str) -> str: | |
for pattern, emo in EMOJI_RULES.items(): | |
if re.search(pattern, text, flags=re.I): | |
return emo | |
return "β" | |
# ------- search function ---------------------------------------------- | |
def search_faq(query: str, top_k: int): | |
if not query.strip(): | |
return pd.DataFrame( | |
columns=["Emoji", "Question", "Answer", "Score"] | |
) | |
q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True) | |
sims = util.cos_sim(q_emb, question_embs)[0] | |
idx_top = sims.topk(k=top_k).indices.cpu().tolist() | |
rows = [ | |
[emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)] | |
for i in idx_top | |
] | |
return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"]) | |
# ------- Gradio UI ----------------------------------------------------- | |
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo: | |
gr.Markdown("# π Semantic FAQ Search") | |
with gr.Row(): | |
q_in = gr.Textbox( | |
label="Ask a question", | |
lines=2, | |
placeholder="e.g. Which spray protects hair from heat?" | |
) | |
k_in = gr.Slider(1, 5, value=3, step=1, label="Results") | |
search_btn = gr.Button("Search", variant="primary") | |
table_out = gr.Dataframe( | |
headers=["Emoji", "Question", "Answer", "Score"], | |
datatype=["str", "str", "str", "number"], | |
wrap=True, | |
interactive=False | |
) | |
search_btn.click(search_faq, [q_in, k_in], table_out) | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0") | |