Spaces:

rdave88
/

models_and_tasks

Sleeping

App Files Files Community

models_and_tasks / app.py

rdave88

Update app.py

abdf2b3 verified 19 days ago

raw

history blame contribute delete

3.82 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	from transformers import pipeline

	# 🔁 Load transformer model once
	task_extractor = pipeline("text2text-generation", model="google/flan-t5-small")

	# 🔁 Optional alias correction
	TASK_ALIASES = {
	"classification": "text-classification",
	"financial classification": "text-classification",
	"news classification": "text-classification",
	"qa": "question-answering",
	"summarisation": "summarization",
	"token": "token-classification",
	"token classification": "token-classification",
	"object detection": "object-detection",
	}

	def normalize_task(task):
	return TASK_ALIASES.get(task.lower(), task)

	#
	# 🔍 Extract task from user input
	def extract_task(user_input):
	prompt = (
	"You are a helpful AI assistant. Your job is to identify the correct Hugging Face model task "
	"based on the user's request. Choose one from the following official tasks:\n"
	"text-classification, token-classification, translation, summarization, object-detection, "
	"image-classification, question-answering, zero-shot-classification, conversational.\n\n"
	"Here are some examples:\n"
	"- 'Translate French to English' → translation\n"
	"- 'Summarize this legal document' → summarization\n"
	"- 'Detect humans in images' → object-detection\n"
	"- 'Classify news articles about finance' → text-classification\n"
	"- 'Extract people and organizations from legal documents' → token-classification\n"
	"- 'Build a chatbot for tourists' → conversational\n\n"
	f"User request: '{user_input}'\n"
	"Return only the task name from the list above."
	)
	result = task_extractor(prompt, max_new_tokens=10)
	task = result[0]["generated_text"].strip().lower()
	return normalize_task(task)


	# 🔍 Scrape models from Hugging Face
	def get_models_for_task(task):
	url = f"https://huggingface.co/models?pipeline_tag={task}"
	headers = {"User-Agent": "Mozilla/5.0"} # avoid bot detection
	response = requests.get(url, headers=headers)

	if response.status_code != 200:
	raise Exception(f"Failed to fetch models: HTTP {response.status_code}")

	soup = BeautifulSoup(response.text, "html.parser")
	model_cards = soup.find_all("article")

	models_info = []
	for card in model_cards[:10]: # Limit to top 10
	name_tag = card.find("a", href=True)
	tags = card.find_all("span", class_="tag") or card.find_all("div", class_="tag")

	name = name_tag.text.strip() if name_tag else "unknown"
	arch = tags[0].text.strip() if tags else "unknown"

	models_info.append({
	"Model Name": name,
	"Task": task,
	"Architecture": arch,
	})

	return models_info

	# 🎛 Gradio UI
	def model_search_interface(user_input):
	try:
	task = extract_task(user_input)
	models = get_models_for_task(task)
	if not models:
	return f"No models found for task '{task}'.", []
	table_data = [[m["Model Name"], m["Task"], m["Architecture"]] for m in models]
	return f"Task identified: {task}", table_data
	except Exception as e:
	return f"❌ Error: {str(e)}", []


	# 🎨 Launch UI
	with gr.Blocks() as demo:
	gr.Markdown("### 🔍 HuggingFace Model Search by Task")

	with gr.Row():
	user_input = gr.Textbox(label="Describe the ML task you're interested in:")
	output_msg = gr.Textbox(label="Status", interactive=False)

	model_table = gr.Dataframe(headers=["Model Name", "Task", "Architecture"], label="Top Models")

	btn = gr.Button("🔍 Search Models")
	btn.click(fn=model_search_interface, inputs=user_input, outputs=[output_msg, model_table])

	demo.launch()