Spaces:

rdave88
/

models_and_tasks

Running

File size: 3,158 Bytes

import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# 🔁 Load transformer model once
task_extractor = pipeline("text2text-generation", model="google/flan-t5-small")

# 🔁 Optional alias correction
TASK_ALIASES = {
    "classification": "text-classification",
    "financial classification": "text-classification",
    "news classification": "text-classification",
    "qa": "question-answering",
    "summarisation": "summarization",
    "token": "token-classification",
    "token classification": "token-classification",
    "object detection": "object-detection",
}

def normalize_task(task):
    return TASK_ALIASES.get(task.lower(), task)

# 🔍 Extract task from user input
def extract_task(user_input):
    prompt = (
        "Given a user query, extract the most likely machine learning task "
        "from the following list: text-classification, token-classification, "
        "translation, summarization, question-answering, object-detection. "
        f"Query: {user_input}. Only return the task name."
    )
    result = task_extractor(prompt, max_new_tokens=10)
    task = result[0]["generated_text"].strip().lower()
    return normalize_task(task)

# 🔍 Scrape models from Hugging Face
def get_models_for_task(task):
    url = f"https://huggingface.co/models?pipeline_tag={task}"
    headers = {"User-Agent": "Mozilla/5.0"}  # avoid bot detection
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f"Failed to fetch models: HTTP {response.status_code}")
    
    soup = BeautifulSoup(response.text, "html.parser")
    model_cards = soup.find_all("article")

    models_info = []
    for card in model_cards[:10]:  # Limit to top 10
        name_tag = card.find("a", href=True)
        tags = card.find_all("span", class_="tag") or card.find_all("div", class_="tag")

        name = name_tag.text.strip() if name_tag else "unknown"
        arch = tags[0].text.strip() if tags else "unknown"

        models_info.append({
            "Model Name": name,
            "Task": task,
            "Architecture": arch,
        })

    return models_info

# 🎛 Gradio UI
def model_search_interface(user_input):
    try:
        task = extract_task(user_input)
        models = get_models_for_task(task)
        if not models:
            return f"No models found for task '{task}'.", []
        table_data = [[m["Model Name"], m["Task"], m["Architecture"]] for m in models]
        return f"Task identified: {task}", table_data
    except Exception as e:
        return f"❌ Error: {str(e)}", []


# 🎨 Launch UI
with gr.Blocks() as demo:
    gr.Markdown("### 🔍 HuggingFace Model Search by Task")

    with gr.Row():
        user_input = gr.Textbox(label="Describe the ML task you're interested in:")
        output_msg = gr.Textbox(label="Status", interactive=False)

    model_table = gr.Dataframe(headers=["Model Name", "Task", "Architecture"], label="Top Models")

    btn = gr.Button("🔍 Search Models")
    btn.click(fn=model_search_interface, inputs=user_input, outputs=[output_msg, model_table])

demo.launch()