Spaces:
Sleeping
Sleeping
File size: 3,032 Bytes
beff51c 6310b52 beff51c 6310b52 beff51c 71b51dc beff51c 6310b52 beff51c 71b51dc beff51c 6310b52 beff51c 6310b52 beff51c 6310b52 beff51c 6310b52 beff51c 6310b52 beff51c 6310b52 beff51c 6310b52 71b51dc beff51c 71b51dc 6310b52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# MiniLM Semantic FAQ Search β CPU-only HF Space
# Works out-of-the-box with faqs.csv in the same folder.
import re
from pathlib import Path
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer, util
# ------- paths & model -------------------------------------------------
BASE_DIR = Path(__file__).parent
CSV_FILE = BASE_DIR / "faqs.csv"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
# ------- load FAQ data -------------------------------------------------
if not CSV_FILE.exists():
raise FileNotFoundError(
f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root."
)
faq_df = pd.read_csv(CSV_FILE)
questions = faq_df["question"].tolist()
answers = faq_df["answer"].tolist()
# ------- embed questions ----------------------------------------------
model = SentenceTransformer(MODEL_NAME)
question_embs = model.encode(
questions, convert_to_tensor=True, normalize_embeddings=True
)
# ------- tiny emoji tagger --------------------------------------------
EMOJI_RULES = {
r"\b(shampoo|conditioner|mask)\b" : "π§΄",
r"\b(hair\s?spray|spray)\b" : "π¨",
r"\b(vegan|botanical|organic)\b" : "π±",
r"\b(heat|thermal|hot)\b" : "π₯",
r"\b(balayage|color|colour|dye)\b" : "πββοΈ",
r"\b(scissors|cut|trim)\b" : "βοΈ",
}
def emoji_for(text: str) -> str:
for pattern, emo in EMOJI_RULES.items():
if re.search(pattern, text, flags=re.I):
return emo
return "β"
# ------- search function ----------------------------------------------
def search_faq(query: str, top_k: int):
if not query.strip():
return pd.DataFrame(
columns=["Emoji", "Question", "Answer", "Score"]
)
q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True)
sims = util.cos_sim(q_emb, question_embs)[0]
idx_top = sims.topk(k=top_k).indices.cpu().tolist()
rows = [
[emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)]
for i in idx_top
]
return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"])
# ------- Gradio UI -----------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo:
gr.Markdown("# π Semantic FAQ Search")
with gr.Row():
q_in = gr.Textbox(
label="Ask a question",
lines=2,
placeholder="e.g. Which spray protects hair from heat?"
)
k_in = gr.Slider(1, 5, value=3, step=1, label="Results")
search_btn = gr.Button("Search", variant="primary")
table_out = gr.Dataframe(
headers=["Emoji", "Question", "Answer", "Score"],
datatype=["str", "str", "str", "number"],
wrap=True,
interactive=False
)
search_btn.click(search_faq, [q_in, k_in], table_out)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")
|