ghostai1's picture
Update app.py
beff51c verified
# MiniLM Semantic FAQ Search – CPU-only HF Space
# Works out-of-the-box with faqs.csv in the same folder.
import re
from pathlib import Path
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer, util
# ------- paths & model -------------------------------------------------
BASE_DIR = Path(__file__).parent
CSV_FILE = BASE_DIR / "faqs.csv"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
# ------- load FAQ data -------------------------------------------------
if not CSV_FILE.exists():
raise FileNotFoundError(
f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root."
)
faq_df = pd.read_csv(CSV_FILE)
questions = faq_df["question"].tolist()
answers = faq_df["answer"].tolist()
# ------- embed questions ----------------------------------------------
model = SentenceTransformer(MODEL_NAME)
question_embs = model.encode(
questions, convert_to_tensor=True, normalize_embeddings=True
)
# ------- tiny emoji tagger --------------------------------------------
EMOJI_RULES = {
r"\b(shampoo|conditioner|mask)\b" : "🧴",
r"\b(hair\s?spray|spray)\b" : "πŸ’¨",
r"\b(vegan|botanical|organic)\b" : "🌱",
r"\b(heat|thermal|hot)\b" : "πŸ”₯",
r"\b(balayage|color|colour|dye)\b" : "πŸ’‡β€β™€οΈ",
r"\b(scissors|cut|trim)\b" : "βœ‚οΈ",
}
def emoji_for(text: str) -> str:
for pattern, emo in EMOJI_RULES.items():
if re.search(pattern, text, flags=re.I):
return emo
return "❓"
# ------- search function ----------------------------------------------
def search_faq(query: str, top_k: int):
if not query.strip():
return pd.DataFrame(
columns=["Emoji", "Question", "Answer", "Score"]
)
q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True)
sims = util.cos_sim(q_emb, question_embs)[0]
idx_top = sims.topk(k=top_k).indices.cpu().tolist()
rows = [
[emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)]
for i in idx_top
]
return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"])
# ------- Gradio UI -----------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo:
gr.Markdown("# πŸ” Semantic FAQ Search")
with gr.Row():
q_in = gr.Textbox(
label="Ask a question",
lines=2,
placeholder="e.g. Which spray protects hair from heat?"
)
k_in = gr.Slider(1, 5, value=3, step=1, label="Results")
search_btn = gr.Button("Search", variant="primary")
table_out = gr.Dataframe(
headers=["Emoji", "Question", "Answer", "Score"],
datatype=["str", "str", "str", "number"],
wrap=True,
interactive=False
)
search_btn.click(search_faq, [q_in, k_in], table_out)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")