File size: 3,816 Bytes
7245f41
9f59898
 
 
7245f41
0c6d118
9f59898
7245f41
0c6d118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abdf2b3
0c6d118
9f59898
0c6d118
abdf2b3
 
 
 
 
 
 
 
 
 
 
 
 
0c6d118
9f59898
 
0c6d118
7245f41
abdf2b3
0c6d118
9f59898
 
2cfd9a7
 
 
 
 
 
0c6d118
2cfd9a7
7245f41
0c6d118
2cfd9a7
 
 
 
 
 
7245f41
0c6d118
2cfd9a7
0c6d118
2cfd9a7
0c6d118
2cfd9a7
0c6d118
7245f41
0c6d118
 
 
 
 
 
 
14ca101
 
9f59898
0c6d118
9f59898
14ca101
0c6d118
9f59898
0c6d118
 
9f59898
0c6d118
 
 
 
 
 
 
9f59898
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# πŸ” Load transformer model once
task_extractor = pipeline("text2text-generation", model="google/flan-t5-small")

# πŸ” Optional alias correction
TASK_ALIASES = {
    "classification": "text-classification",
    "financial classification": "text-classification",
    "news classification": "text-classification",
    "qa": "question-answering",
    "summarisation": "summarization",
    "token": "token-classification",
    "token classification": "token-classification",
    "object detection": "object-detection",
}

def normalize_task(task):
    return TASK_ALIASES.get(task.lower(), task)

# 
# πŸ” Extract task from user input
def extract_task(user_input):
    prompt = (
        "You are a helpful AI assistant. Your job is to identify the correct Hugging Face model task "
        "based on the user's request. Choose one from the following official tasks:\n"
        "text-classification, token-classification, translation, summarization, object-detection, "
        "image-classification, question-answering, zero-shot-classification, conversational.\n\n"
        "Here are some examples:\n"
        "- 'Translate French to English' β†’ translation\n"
        "- 'Summarize this legal document' β†’ summarization\n"
        "- 'Detect humans in images' β†’ object-detection\n"
        "- 'Classify news articles about finance' β†’ text-classification\n"
        "- 'Extract people and organizations from legal documents' β†’ token-classification\n"
        "- 'Build a chatbot for tourists' β†’ conversational\n\n"
        f"User request: '{user_input}'\n"
        "Return only the task name from the list above."
    )
    result = task_extractor(prompt, max_new_tokens=10)
    task = result[0]["generated_text"].strip().lower()
    return normalize_task(task)


# πŸ” Scrape models from Hugging Face
def get_models_for_task(task):
    url = f"https://huggingface.co/models?pipeline_tag={task}"
    headers = {"User-Agent": "Mozilla/5.0"}  # avoid bot detection
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f"Failed to fetch models: HTTP {response.status_code}")
    
    soup = BeautifulSoup(response.text, "html.parser")
    model_cards = soup.find_all("article")

    models_info = []
    for card in model_cards[:10]:  # Limit to top 10
        name_tag = card.find("a", href=True)
        tags = card.find_all("span", class_="tag") or card.find_all("div", class_="tag")

        name = name_tag.text.strip() if name_tag else "unknown"
        arch = tags[0].text.strip() if tags else "unknown"

        models_info.append({
            "Model Name": name,
            "Task": task,
            "Architecture": arch,
        })

    return models_info

# πŸŽ› Gradio UI
def model_search_interface(user_input):
    try:
        task = extract_task(user_input)
        models = get_models_for_task(task)
        if not models:
            return f"No models found for task '{task}'.", []
        table_data = [[m["Model Name"], m["Task"], m["Architecture"]] for m in models]
        return f"Task identified: {task}", table_data
    except Exception as e:
        return f"❌ Error: {str(e)}", []


# 🎨 Launch UI
with gr.Blocks() as demo:
    gr.Markdown("### πŸ” HuggingFace Model Search by Task")

    with gr.Row():
        user_input = gr.Textbox(label="Describe the ML task you're interested in:")
        output_msg = gr.Textbox(label="Status", interactive=False)

    model_table = gr.Dataframe(headers=["Model Name", "Task", "Architecture"], label="Top Models")

    btn = gr.Button("πŸ” Search Models")
    btn.click(fn=model_search_interface, inputs=user_input, outputs=[output_msg, model_table])

demo.launch()