Spaces:

ghostai1
/

sentence-transformers

Sleeping

File size: 3,032 Bytes

beff51c
 
6310b52
 
 
beff51c
6310b52
beff51c
71b51dc
 
beff51c
 
 
6310b52
 
beff51c
 
 
 
 
 
 
71b51dc
 
 
beff51c
6310b52
 
 
 
 
beff51c
6310b52
beff51c
 
 
 
 
 
6310b52
beff51c
 
 
6310b52
 
 
beff51c
6310b52
 
beff51c
 
 
 
 
 
 
 
 
 
6310b52
 
 
beff51c
6310b52
 
71b51dc
beff51c
 
 
 
 
 
 
 
 
 
 
 
 
 
71b51dc
 
6310b52

# MiniLM Semantic FAQ Search – CPU-only HF Space
# Works out-of-the-box with faqs.csv in the same folder.

import re
from pathlib import Path

import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer, util

# ------- paths & model -------------------------------------------------
BASE_DIR   = Path(__file__).parent
CSV_FILE   = BASE_DIR / "faqs.csv"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

# ------- load FAQ data -------------------------------------------------
if not CSV_FILE.exists():
    raise FileNotFoundError(
        f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root."
    )

faq_df    = pd.read_csv(CSV_FILE)
questions = faq_df["question"].tolist()
answers   = faq_df["answer"].tolist()

# ------- embed questions ----------------------------------------------
model = SentenceTransformer(MODEL_NAME)
question_embs = model.encode(
    questions, convert_to_tensor=True, normalize_embeddings=True
)

# ------- tiny emoji tagger --------------------------------------------
EMOJI_RULES = {
    r"\b(shampoo|conditioner|mask)\b"        : "🧴",
    r"\b(hair\s?spray|spray)\b"              : "💨",
    r"\b(vegan|botanical|organic)\b"         : "🌱",
    r"\b(heat|thermal|hot)\b"                : "🔥",
    r"\b(balayage|color|colour|dye)\b"       : "💇‍♀️",
    r"\b(scissors|cut|trim)\b"               : "✂️",
}
def emoji_for(text: str) -> str:
    for pattern, emo in EMOJI_RULES.items():
        if re.search(pattern, text, flags=re.I):
            return emo
    return "❓"

# ------- search function ----------------------------------------------
def search_faq(query: str, top_k: int):
    if not query.strip():
        return pd.DataFrame(
            columns=["Emoji", "Question", "Answer", "Score"]
        )
    q_emb   = model.encode(query, convert_to_tensor=True, normalize_embeddings=True)
    sims    = util.cos_sim(q_emb, question_embs)[0]
    idx_top = sims.topk(k=top_k).indices.cpu().tolist()

    rows = [
        [emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)]
        for i in idx_top
    ]
    return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"])

# ------- Gradio UI -----------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo:
    gr.Markdown("# 🔍 Semantic FAQ Search")
    with gr.Row():
        q_in = gr.Textbox(
            label="Ask a question",
            lines=2,
            placeholder="e.g. Which spray protects hair from heat?"
        )
        k_in = gr.Slider(1, 5, value=3, step=1, label="Results")
    search_btn = gr.Button("Search", variant="primary")
    table_out  = gr.Dataframe(
        headers=["Emoji", "Question", "Answer", "Score"],
        datatype=["str", "str", "str", "number"],
        wrap=True,
        interactive=False
    )
    search_btn.click(search_faq, [q_in, k_in], table_out)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0")