import gradio as gr import requests from bs4 import BeautifulSoup from transformers import pipeline # 🔁 Load transformer model once task_extractor = pipeline("text2text-generation", model="google/flan-t5-small") # 🔁 Optional alias correction TASK_ALIASES = { "classification": "text-classification", "financial classification": "text-classification", "news classification": "text-classification", "qa": "question-answering", "summarisation": "summarization", "token": "token-classification", "token classification": "token-classification", "object detection": "object-detection", } def normalize_task(task): return TASK_ALIASES.get(task.lower(), task) # # 🔍 Extract task from user input def extract_task(user_input): prompt = ( "You are a helpful AI assistant. Your job is to identify the correct Hugging Face model task " "based on the user's request. Choose one from the following official tasks:\n" "text-classification, token-classification, translation, summarization, object-detection, " "image-classification, question-answering, zero-shot-classification, conversational.\n\n" "Here are some examples:\n" "- 'Translate French to English' → translation\n" "- 'Summarize this legal document' → summarization\n" "- 'Detect humans in images' → object-detection\n" "- 'Classify news articles about finance' → text-classification\n" "- 'Extract people and organizations from legal documents' → token-classification\n" "- 'Build a chatbot for tourists' → conversational\n\n" f"User request: '{user_input}'\n" "Return only the task name from the list above." ) result = task_extractor(prompt, max_new_tokens=10) task = result[0]["generated_text"].strip().lower() return normalize_task(task) # 🔍 Scrape models from Hugging Face def get_models_for_task(task): url = f"https://huggingface.co/models?pipeline_tag={task}" headers = {"User-Agent": "Mozilla/5.0"} # avoid bot detection response = requests.get(url, headers=headers) if response.status_code != 200: raise Exception(f"Failed to fetch models: HTTP {response.status_code}") soup = BeautifulSoup(response.text, "html.parser") model_cards = soup.find_all("article") models_info = [] for card in model_cards[:10]: # Limit to top 10 name_tag = card.find("a", href=True) tags = card.find_all("span", class_="tag") or card.find_all("div", class_="tag") name = name_tag.text.strip() if name_tag else "unknown" arch = tags[0].text.strip() if tags else "unknown" models_info.append({ "Model Name": name, "Task": task, "Architecture": arch, }) return models_info # 🎛 Gradio UI def model_search_interface(user_input): try: task = extract_task(user_input) models = get_models_for_task(task) if not models: return f"No models found for task '{task}'.", [] table_data = [[m["Model Name"], m["Task"], m["Architecture"]] for m in models] return f"Task identified: {task}", table_data except Exception as e: return f"❌ Error: {str(e)}", [] # 🎨 Launch UI with gr.Blocks() as demo: gr.Markdown("### 🔍 HuggingFace Model Search by Task") with gr.Row(): user_input = gr.Textbox(label="Describe the ML task you're interested in:") output_msg = gr.Textbox(label="Status", interactive=False) model_table = gr.Dataframe(headers=["Model Name", "Task", "Architecture"], label="Top Models") btn = gr.Button("🔍 Search Models") btn.click(fn=model_search_interface, inputs=user_input, outputs=[output_msg, model_table]) demo.launch()