Spaces:

Agents-MCP-Hackathon
/

kaggle-mcp-server

Running

App Files Files Community

kaggle-mcp-server / app.py

Roobick

remove language filter and update output fields

d855e11 8 days ago

raw

history blame

4.5 kB

	import gradio as gr
	import os, json, pathlib, tempfile, datetime
	from typing import List, Dict, Optional

	from dotenv import load_dotenv
	load_dotenv()

	def search_datasets(query: str, max_results: int = 20) -> List[Dict]:
	"""
	Return brief metadata for up to `max_results` public datasets
	whose title or description matches `query`.
	"""
	results = api.dataset_list(search=query, max_size= None)
	out = []
	for ds in results[:max_results]:
	out.append({
	"title": ds.title,
	"slug": ds.ref,
	"size_mb": round(ds.total_bytes/1e6, 2),
	"downloads": ds.download_count,
	"votes": ds.vote_count,
	})
	return out

	def list_files(dataset_slug: str) -> List[Dict]:
	files = api.dataset_list_files(dataset_slug).files
	return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files]

	def download_file(dataset_slug: str, file_name: str):
	tmp_dir = tempfile.mkdtemp()
	api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
	zip_path = pathlib.Path(tmp_dir) / f"{file_name}"

	if not zip_path.exists():
	zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
	return str(zip_path)

	def search_kernels(query: str, max_results: int = 20) -> List[Dict]:

	kernels = api.kernels_list(
	search=query,
	page_size=min(max_results, 20),
	sort_by="voteCount",
	)

	out = []
	for k in kernels[:max_results]:
	last_run_raw = getattr(k, "lastRunTime", None) or getattr(k, "updated", None)
	try:
	last_run = (
	datetime.datetime.fromisoformat(last_run_raw.rstrip("z"))
	.strftime("%Y-%m-%d %H:%M") if last_run_raw else None
	)
	except Exception:
	last_run = last_run_raw
	out.append(
	{
	"title": k.title,
	"ref": k.ref,
	"language": getattr(k, "language", None),
	"kernel_type": getattr(k, "kernelType", None),
	"votes": k.total_votes,
	"last_run": last_run,
	}
	)
	return out


	search_iface = gr.Interface(
	fn=search_datasets,
	inputs=[
	gr.Textbox(label="Search term", placeholder="e.g. fashion mnist"),
	gr.Slider(1, 50, step=1, value=20, label="Max results")
	],
	outputs=gr.JSON(label="Datasets"),
	title="Search kaggle Datasets",
	description="Resturns a JSON array of dataset metadata."
	)

	list_files_iface = gr.Interface(
	fn=list_files,
	inputs=gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"),
	outputs=gr.JSON(label="Files"),
	title="List Dataset Files",
	description="Given a dataset slug, returns its file list."
	)

	download_file_iface = gr.Interface(
	fn=download_file,
	inputs=[
	gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"),
	gr.Textbox(label="File name", placeholder="fashion-mnist_test.csv")
	],
	outputs=gr.File(label="Download file"),
	title="Download a File",
	description="Downloads one file from the dataset and returns it."
	)

	search_kernels_iface = gr.Interface(
	fn=search_kernels,
	inputs=[
	gr.Textbox(label="search term", placeholder="e.g. computer vision"),
	gr.Slider(1, 50, step=1, value=20, label="Max results"),
	],
	outputs=gr.JSON(label="kernels"),
	title="Search kaggle kernels",
	description="Find notebook or script kernels by keyword."
	)

	demo = gr.TabbedInterface(
	[search_iface, list_files_iface, download_file_iface, search_kernels_iface],
	tab_names=["Search Datasets", "Files", "Download File", "Search Kernels"],
	)

	def _bootstrap_kaggle_credentials():
	user = os.getenv("KAGGLE_USERNAME")
	key = os.getenv("KAGGLE_KEY")
	if not (user and key):
	raise RuntimeError(
	"Kaggle credentials not found."
	"Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env"
	)
	cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json"
	if not cred_path.exists():
	cred_path.parent.mkdir(exist_ok=True)
	cred_path.write_text(json.dumps({"username": user, "key": key}))
	cred_path.chmod(0o600)

	_bootstrap_kaggle_credentials()

	from kaggle.api.kaggle_api_extended import KaggleApi
	api = KaggleApi()
	api.authenticate()

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)