environment / app.py
sibthinon's picture
Set up model
28b8e02 verified
raw
history blame
4.15 kB
import gradio as gr
import time
from datetime import datetime
import pandas as pd
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
import os
from qdrant_client import QdrantClient
qdrant_client = QdrantClient(
url=os.environ.get("Qdrant_url"),
api_key=os.environ.get("Qdrant_api")
)
# โมเดลที่โหลดล่วงหน้า
models = {
"E5 (intfloat/multilingual-e5-small)": SentenceTransformer('intfloat/multilingual-e5-small'),
"MiniLM (paraphrase-multilingual-MiniLM-L12-v2)": SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'),
"DistilUSE (distiluse-base-multilingual-cased-v1)": SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v1')
}
# Global memory to hold feedback state
latest_query_result = {"query": "", "result": "", "model": ""}
# 🔍 Search Functions
def search_with_e5(query):
embed = models["E5 (intfloat/multilingual-e5-small)"].encode("query: " + query)
return embed
def search_with_minilm(query):
embed = models["MiniLM (paraphrase-multilingual-MiniLM-L12-v2)"].encode(query)
return embed
def search_with_distiluse(query):
embed = models["DistilUSE (distiluse-base-multilingual-cased-v1)"].encode(query)
return embed
# 🌟 Main search function
def search_product(query, model_name):
start_time = time.time()
# Choose encoder function
if "E5" in model_name:
query_embed = search_with_e5(query)
elif "MiniLM" in model_name:
query_embed = search_with_minilm(query)
elif "DistilUSE" in model_name:
query_embed = search_with_distiluse(query)
else:
return "❌ ไม่พบโมเดล"
# Query Qdrant
result = qdrant_client.query_points(
collection_name="product_E5",
query=query_embed.tolist(),
with_payload=True,
query_filter=Filter(
must=[FieldCondition(key="type", match=MatchValue(value="product"))]
)
).points
elapsed = time.time() - start_time
# Format result
output = f"⏱ Time: {elapsed:.2f}s\n\n📦 ผลลัพธ์:\n"
result_summary = ""
for res in result:
line = f"- {res.payload.get('name', '')} (score: {res.score:.4f})"
output += line + "\n"
result_summary += line + " | "
# Save latest query
latest_query_result["query"] = query
latest_query_result["result"] = result_summary.strip()
latest_query_result["model"] = model_name
return output
# 📝 Logging feedback
def log_feedback(feedback):
now = datetime.now().isoformat()
log_entry = {
"timestamp": now,
"model": latest_query_result["model"],
"query": latest_query_result["query"],
"result": latest_query_result["result"],
"feedback": feedback
}
df = pd.DataFrame([log_entry])
df.to_csv("feedback_log.csv", mode='a', header=not pd.io.common.file_exists("feedback_log.csv"), index=False)
return f"✅ Feedback saved: {feedback}"
# 🎨 Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🔎 Product Semantic Search (Vector Search + Qdrant)")
with gr.Row():
model_selector = gr.Dropdown(
choices=list(models.keys()),
label="เลือกโมเดล",
value="E5 (intfloat/multilingual-e5-small)"
)
query_input = gr.Textbox(label="พิมพ์คำค้นหา")
result_output = gr.Textbox(label="📋 ผลลัพธ์")
with gr.Row():
match_btn = gr.Button("✅ ตรง")
not_match_btn = gr.Button("❌ ไม่ตรง")
feedback_status = gr.Textbox(label="📬 สถานะ Feedback")
# Events
submit_fn = lambda q, m: search_product(q, m)
query_input.submit(submit_fn, inputs=[query_input, model_selector], outputs=result_output)
match_btn.click(lambda: log_feedback("match"), outputs=feedback_status)
not_match_btn.click(lambda: log_feedback("not_match"), outputs=feedback_status)
# Run app
demo.launch(share=True)