# MiniLM Semantic FAQ Search โ€“ CPU-only HF Space # Works out-of-the-box with faqs.csv in the same folder. import re from pathlib import Path import gradio as gr import pandas as pd from sentence_transformers import SentenceTransformer, util # ------- paths & model ------------------------------------------------- BASE_DIR = Path(__file__).parent CSV_FILE = BASE_DIR / "faqs.csv" MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # ------- load FAQ data ------------------------------------------------- if not CSV_FILE.exists(): raise FileNotFoundError( f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root." ) faq_df = pd.read_csv(CSV_FILE) questions = faq_df["question"].tolist() answers = faq_df["answer"].tolist() # ------- embed questions ---------------------------------------------- model = SentenceTransformer(MODEL_NAME) question_embs = model.encode( questions, convert_to_tensor=True, normalize_embeddings=True ) # ------- tiny emoji tagger -------------------------------------------- EMOJI_RULES = { r"\b(shampoo|conditioner|mask)\b" : "๐Ÿงด", r"\b(hair\s?spray|spray)\b" : "๐Ÿ’จ", r"\b(vegan|botanical|organic)\b" : "๐ŸŒฑ", r"\b(heat|thermal|hot)\b" : "๐Ÿ”ฅ", r"\b(balayage|color|colour|dye)\b" : "๐Ÿ’‡โ€โ™€๏ธ", r"\b(scissors|cut|trim)\b" : "โœ‚๏ธ", } def emoji_for(text: str) -> str: for pattern, emo in EMOJI_RULES.items(): if re.search(pattern, text, flags=re.I): return emo return "โ“" # ------- search function ---------------------------------------------- def search_faq(query: str, top_k: int): if not query.strip(): return pd.DataFrame( columns=["Emoji", "Question", "Answer", "Score"] ) q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True) sims = util.cos_sim(q_emb, question_embs)[0] idx_top = sims.topk(k=top_k).indices.cpu().tolist() rows = [ [emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)] for i in idx_top ] return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"]) # ------- Gradio UI ----------------------------------------------------- with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo: gr.Markdown("# ๐Ÿ” Semantic FAQ Search") with gr.Row(): q_in = gr.Textbox( label="Ask a question", lines=2, placeholder="e.g. Which spray protects hair from heat?" ) k_in = gr.Slider(1, 5, value=3, step=1, label="Results") search_btn = gr.Button("Search", variant="primary") table_out = gr.Dataframe( headers=["Emoji", "Question", "Answer", "Score"], datatype=["str", "str", "str", "number"], wrap=True, interactive=False ) search_btn.click(search_faq, [q_in, k_in], table_out) if __name__ == "__main__": demo.launch(server_name="0.0.0.0")