import gradio as gr import os, json, pathlib, tempfile, datetime, shutil, io from typing import List, Dict, Optional from contextlib import redirect_stdout, redirect_stderr from dotenv import load_dotenv load_dotenv() def search_datasets(query: str, max_results: int = 20) -> List[Dict]: """ Return brief metadata for up to `max_results` public datasets whose title or description matches `query`. """ results = api.dataset_list(search=query, max_size= None) out = [] for ds in results[:max_results]: out.append({ "title": ds.title, "slug": ds.ref, "size_mb": round(ds.total_bytes/1e6, 2), "downloads": ds.download_count, "votes": ds.vote_count, }) return out def list_files(dataset_slug: str) -> List[Dict]: files = api.dataset_list_files(dataset_slug).files return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files] def download_dataset_file(dataset_slug: str, file_name: str): tmp_dir = tempfile.mkdtemp() api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False) zip_path = pathlib.Path(tmp_dir) / f"{file_name}" if not zip_path.exists(): zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip" return str(zip_path) def search_kernels(query: str, max_results: int = 20) -> List[Dict]: kernels = api.kernels_list( search=query, page_size=min(max_results, 20), sort_by="voteCount", ) out = [] for k in kernels[:max_results]: last_run_raw = getattr(k, "lastRunTime", None) or getattr(k, "updated", None) try: last_run = ( datetime.datetime.fromisoformat(last_run_raw.rstrip("z")) .strftime("%Y-%m-%d %H:%M") if last_run_raw else None ) except Exception: last_run = last_run_raw out.append( { "title": k.title, "ref": k.ref, "language": getattr(k, "language", None), "kernel_type": getattr(k, "kernelType", None), "votes": k.total_votes, "last_run": last_run, } ) return out def download_kernel_notebook(kernel_ref: str) -> str: tmp_dir = tempfile.mkdtemp() api.kernels_pull(kernel_ref, path=tmp_dir, metadata=True, quiet=False) zip_path = shutil.make_archive( base_name=os.path.join(tmp_dir, "kernel"), format = "zip", root_dir=tmp_dir, ) return zip_path search_iface = gr.Interface( fn=search_datasets, inputs=[ gr.Textbox(label="Search term", placeholder="e.g. fashion mnist"), gr.Slider(1, 50, step=1, value=20, label="Max results") ], outputs=gr.JSON(label="Datasets"), title="Search kaggle Datasets", description="Resturns a JSON array of dataset metadata." ) download_kernel_iface = gr.Interface( fn = download_kernel_notebook, inputs=gr.Textbox( label="kernel reference", placeholder="e.g. username/notebook-name", ), outputs=gr.File(label="Downlaod .zip"), title="pull kaggle kernel", description="Downlaods the notebook or script kernel and returns a ZIP archive." ) list_files_iface = gr.Interface( fn=list_files, inputs=gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"), outputs=gr.JSON(label="Files"), title="List Dataset Files", description="Given a dataset slug, returns its file list." ) download_dataset_iface = gr.Interface( fn=download_dataset_file, inputs=[ gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"), gr.Textbox(label="File name", placeholder="fashion-mnist_test.csv") ], outputs=gr.File(label="Download file"), title="Download a File", description="Downloads one file from the dataset and returns it." ) search_kernels_iface = gr.Interface( fn=search_kernels, inputs=[ gr.Textbox(label="search term", placeholder="e.g. computer vision"), gr.Slider(1, 50, step=1, value=20, label="Max results"), ], outputs=gr.JSON(label="kernels"), title="Search kaggle kernels", description="Find notebook or script kernels by keyword." ) demo = gr.TabbedInterface( [search_iface, list_files_iface, download_dataset_iface, search_kernels_iface, download_kernel_iface], tab_names=["Search Datasets", "Files", "Download dataset", "Search Kernels", "Download kernels", "Upload kernel zip"], ) def _bootstrap_kaggle_credentials(): user = os.getenv("KAGGLE_USERNAME") key = os.getenv("KAGGLE_KEY") if not (user and key): raise RuntimeError( "Kaggle credentials not found." "Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env" ) cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json" if not cred_path.exists(): cred_path.parent.mkdir(exist_ok=True) cred_path.write_text(json.dumps({"username": user, "key": key})) cred_path.chmod(0o600) _bootstrap_kaggle_credentials() from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)