Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
# π Load transformer model once | |
task_extractor = pipeline("text2text-generation", model="google/flan-t5-small") | |
# π Optional alias correction | |
TASK_ALIASES = { | |
"classification": "text-classification", | |
"financial classification": "text-classification", | |
"news classification": "text-classification", | |
"qa": "question-answering", | |
"summarisation": "summarization", | |
"token": "token-classification", | |
"token classification": "token-classification", | |
"object detection": "object-detection", | |
} | |
def normalize_task(task): | |
return TASK_ALIASES.get(task.lower(), task) | |
# | |
# π Extract task from user input | |
def extract_task(user_input): | |
prompt = ( | |
"You are a helpful AI assistant. Your job is to identify the correct Hugging Face model task " | |
"based on the user's request. Choose one from the following official tasks:\n" | |
"text-classification, token-classification, translation, summarization, object-detection, " | |
"image-classification, question-answering, zero-shot-classification, conversational.\n\n" | |
"Here are some examples:\n" | |
"- 'Translate French to English' β translation\n" | |
"- 'Summarize this legal document' β summarization\n" | |
"- 'Detect humans in images' β object-detection\n" | |
"- 'Classify news articles about finance' β text-classification\n" | |
"- 'Extract people and organizations from legal documents' β token-classification\n" | |
"- 'Build a chatbot for tourists' β conversational\n\n" | |
f"User request: '{user_input}'\n" | |
"Return only the task name from the list above." | |
) | |
result = task_extractor(prompt, max_new_tokens=10) | |
task = result[0]["generated_text"].strip().lower() | |
return normalize_task(task) | |
# π Scrape models from Hugging Face | |
def get_models_for_task(task): | |
url = f"https://huggingface.co/models?pipeline_tag={task}" | |
headers = {"User-Agent": "Mozilla/5.0"} # avoid bot detection | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
raise Exception(f"Failed to fetch models: HTTP {response.status_code}") | |
soup = BeautifulSoup(response.text, "html.parser") | |
model_cards = soup.find_all("article") | |
models_info = [] | |
for card in model_cards[:10]: # Limit to top 10 | |
name_tag = card.find("a", href=True) | |
tags = card.find_all("span", class_="tag") or card.find_all("div", class_="tag") | |
name = name_tag.text.strip() if name_tag else "unknown" | |
arch = tags[0].text.strip() if tags else "unknown" | |
models_info.append({ | |
"Model Name": name, | |
"Task": task, | |
"Architecture": arch, | |
}) | |
return models_info | |
# π Gradio UI | |
def model_search_interface(user_input): | |
try: | |
task = extract_task(user_input) | |
models = get_models_for_task(task) | |
if not models: | |
return f"No models found for task '{task}'.", [] | |
table_data = [[m["Model Name"], m["Task"], m["Architecture"]] for m in models] | |
return f"Task identified: {task}", table_data | |
except Exception as e: | |
return f"β Error: {str(e)}", [] | |
# π¨ Launch UI | |
with gr.Blocks() as demo: | |
gr.Markdown("### π HuggingFace Model Search by Task") | |
with gr.Row(): | |
user_input = gr.Textbox(label="Describe the ML task you're interested in:") | |
output_msg = gr.Textbox(label="Status", interactive=False) | |
model_table = gr.Dataframe(headers=["Model Name", "Task", "Architecture"], label="Top Models") | |
btn = gr.Button("π Search Models") | |
btn.click(fn=model_search_interface, inputs=user_input, outputs=[output_msg, model_table]) | |
demo.launch() | |