ghostai1 commited on
Commit
beff51c
Β·
verified Β·
1 Parent(s): 6310b52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -33
app.py CHANGED
@@ -1,65 +1,83 @@
1
- # app.py ─ HF Space β€’ MiniLM semantic FAQ search (CPU-only)
 
2
 
3
  import re
4
  from pathlib import Path
5
- import pandas as pd
6
  import gradio as gr
 
7
  from sentence_transformers import SentenceTransformer, util
8
 
9
- # ─────────── config ───────────
10
- CSV_PATH = Path("faqs.csv")
 
11
  MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
12
 
13
- # ─────────── load data/model ───────────
14
- faq_df = pd.read_csv(CSV_PATH)
 
 
 
 
 
15
  questions = faq_df["question"].tolist()
16
  answers = faq_df["answer"].tolist()
17
 
 
18
  model = SentenceTransformer(MODEL_NAME)
19
  question_embs = model.encode(
20
  questions, convert_to_tensor=True, normalize_embeddings=True
21
  )
22
 
23
- # ─────────── tiny emoji tagger ───────────
24
  EMOJI_RULES = {
25
- r"\b(shampoo|conditioner|mask)\b" : "🧴",
26
- r"\b(hair\s?spray|spray)\b" : "πŸ’¨",
27
- r"\b(vegan|botanical|organic)\b" : "🌱",
28
- r"\b(heat|thermal)\b" : "πŸ”₯",
29
- r"\b(balayage|color|colour|dye)\b" : "πŸ’‡β€β™€οΈ",
30
- r"\b(scissors|cut|trim)\b" : "βœ‚οΈ",
31
  }
32
- def tag_emoji(text: str) -> str:
33
- for pat, emo in EMOJI_RULES.items():
34
- if re.search(pat, text, flags=re.I):
35
  return emo
36
  return "❓"
37
 
38
- # ─────────── search fn ───────────
39
  def search_faq(query: str, top_k: int):
40
  if not query.strip():
41
- return pd.DataFrame(columns=["Emoji", "Question", "Answer", "Score"])
42
- q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True)
43
- scores = util.cos_sim(q_emb, question_embs)[0]
44
- idx_list = scores.topk(k=top_k).indices.cpu().tolist()
45
- rows = [
46
- [tag_emoji(answers[i]), questions[i], answers[i], round(float(scores[i]), 3)]
47
- for i in idx_list
 
 
 
48
  ]
49
  return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"])
50
 
51
- # ─────────── gradio ui ───────────
52
  with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo:
53
  gr.Markdown("# πŸ” Semantic FAQ Search")
54
  with gr.Row():
55
- inp = gr.Textbox(label="Ask a question", lines=2,
56
- placeholder="e.g. Which spray protects hair from heat?")
57
- k = gr.Slider(1, 5, value=3, step=1, label="Number of results")
58
- btn = gr.Button("Search", variant="primary")
59
- table = gr.Dataframe(headers=["Emoji", "Question", "Answer", "Score"],
60
- datatype=["str", "str", "str", "number"],
61
- wrap=True, interactive=False)
62
- btn.click(search_faq, [inp, k], table)
 
 
 
 
 
 
63
 
64
  if __name__ == "__main__":
65
  demo.launch(server_name="0.0.0.0")
 
1
+ # MiniLM Semantic FAQ Search – CPU-only HF Space
2
+ # Works out-of-the-box with faqs.csv in the same folder.
3
 
4
  import re
5
  from pathlib import Path
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
  from sentence_transformers import SentenceTransformer, util
10
 
11
+ # ------- paths & model -------------------------------------------------
12
+ BASE_DIR = Path(__file__).parent
13
+ CSV_FILE = BASE_DIR / "faqs.csv"
14
  MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
15
 
16
+ # ------- load FAQ data -------------------------------------------------
17
+ if not CSV_FILE.exists():
18
+ raise FileNotFoundError(
19
+ f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root."
20
+ )
21
+
22
+ faq_df = pd.read_csv(CSV_FILE)
23
  questions = faq_df["question"].tolist()
24
  answers = faq_df["answer"].tolist()
25
 
26
+ # ------- embed questions ----------------------------------------------
27
  model = SentenceTransformer(MODEL_NAME)
28
  question_embs = model.encode(
29
  questions, convert_to_tensor=True, normalize_embeddings=True
30
  )
31
 
32
+ # ------- tiny emoji tagger --------------------------------------------
33
  EMOJI_RULES = {
34
+ r"\b(shampoo|conditioner|mask)\b" : "🧴",
35
+ r"\b(hair\s?spray|spray)\b" : "πŸ’¨",
36
+ r"\b(vegan|botanical|organic)\b" : "🌱",
37
+ r"\b(heat|thermal|hot)\b" : "πŸ”₯",
38
+ r"\b(balayage|color|colour|dye)\b" : "πŸ’‡β€β™€οΈ",
39
+ r"\b(scissors|cut|trim)\b" : "βœ‚οΈ",
40
  }
41
+ def emoji_for(text: str) -> str:
42
+ for pattern, emo in EMOJI_RULES.items():
43
+ if re.search(pattern, text, flags=re.I):
44
  return emo
45
  return "❓"
46
 
47
+ # ------- search function ----------------------------------------------
48
  def search_faq(query: str, top_k: int):
49
  if not query.strip():
50
+ return pd.DataFrame(
51
+ columns=["Emoji", "Question", "Answer", "Score"]
52
+ )
53
+ q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True)
54
+ sims = util.cos_sim(q_emb, question_embs)[0]
55
+ idx_top = sims.topk(k=top_k).indices.cpu().tolist()
56
+
57
+ rows = [
58
+ [emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)]
59
+ for i in idx_top
60
  ]
61
  return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"])
62
 
63
+ # ------- Gradio UI -----------------------------------------------------
64
  with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo:
65
  gr.Markdown("# πŸ” Semantic FAQ Search")
66
  with gr.Row():
67
+ q_in = gr.Textbox(
68
+ label="Ask a question",
69
+ lines=2,
70
+ placeholder="e.g. Which spray protects hair from heat?"
71
+ )
72
+ k_in = gr.Slider(1, 5, value=3, step=1, label="Results")
73
+ search_btn = gr.Button("Search", variant="primary")
74
+ table_out = gr.Dataframe(
75
+ headers=["Emoji", "Question", "Answer", "Score"],
76
+ datatype=["str", "str", "str", "number"],
77
+ wrap=True,
78
+ interactive=False
79
+ )
80
+ search_btn.click(search_faq, [q_in, k_in], table_out)
81
 
82
  if __name__ == "__main__":
83
  demo.launch(server_name="0.0.0.0")