Spaces:

Agents-MCP-Hackathon
/

kaggle-mcp-server

Running

File size: 4,504 Bytes

import gradio as gr
import os, json, pathlib, tempfile, datetime
from typing import List, Dict, Optional

from dotenv import load_dotenv
load_dotenv()

def search_datasets(query: str, max_results: int = 20) -> List[Dict]:
    """
    Return brief metadata for up to `max_results` public datasets
    whose title or description matches `query`.
    """
    results = api.dataset_list(search=query, max_size= None)
    out = []
    for ds in results[:max_results]:
        out.append({
            "title": ds.title,
            "slug": ds.ref,
            "size_mb": round(ds.total_bytes/1e6, 2),
            "downloads": ds.download_count,
            "votes": ds.vote_count,
        })
    return out

def list_files(dataset_slug: str) -> List[Dict]:
    files = api.dataset_list_files(dataset_slug).files
    return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files]

def download_file(dataset_slug: str, file_name: str):
    tmp_dir = tempfile.mkdtemp()
    api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
    zip_path = pathlib.Path(tmp_dir) / f"{file_name}"

    if not zip_path.exists():
        zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
    return str(zip_path)

def search_kernels(query: str, max_results: int = 20) -> List[Dict]:
    
    kernels = api.kernels_list(
        search=query,
        page_size=min(max_results, 20),
        sort_by="voteCount",
    )

    out = []
    for k in kernels[:max_results]:
        last_run_raw = getattr(k, "lastRunTime", None) or getattr(k, "updated", None)
        try:
            last_run = (
                datetime.datetime.fromisoformat(last_run_raw.rstrip("z"))
                .strftime("%Y-%m-%d %H:%M") if last_run_raw else None
            )
        except Exception:
            last_run = last_run_raw
        out.append(
            {
                "title": k.title,
                "ref": k.ref,
                "language": getattr(k, "language", None),
                "kernel_type": getattr(k, "kernelType", None),
                "votes": k.total_votes,
                "last_run": last_run,
            }
        )
    return out
    

search_iface = gr.Interface(
    fn=search_datasets,
    inputs=[
        gr.Textbox(label="Search term", placeholder="e.g. fashion mnist"),
        gr.Slider(1, 50, step=1, value=20, label="Max results")
    ],
    outputs=gr.JSON(label="Datasets"),
    title="Search kaggle Datasets",
    description="Resturns a JSON array of dataset metadata."
)

list_files_iface = gr.Interface(
    fn=list_files,
    inputs=gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"),
    outputs=gr.JSON(label="Files"),
    title="List Dataset Files",
    description="Given a dataset slug, returns its file list."
)

download_file_iface = gr.Interface(
    fn=download_file,
    inputs=[
        gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"),
        gr.Textbox(label="File name", placeholder="fashion-mnist_test.csv")
    ],
    outputs=gr.File(label="Download file"),
    title="Download a File",
    description="Downloads one file from the dataset and returns it."
)

search_kernels_iface = gr.Interface(
    fn=search_kernels,
    inputs=[
        gr.Textbox(label="search term", placeholder="e.g. computer vision"),
        gr.Slider(1, 50, step=1, value=20, label="Max results"),
    ],
    outputs=gr.JSON(label="kernels"),
    title="Search kaggle kernels",
    description="Find notebook or script kernels by keyword."
)

demo = gr.TabbedInterface(
    [search_iface, list_files_iface, download_file_iface, search_kernels_iface],
    tab_names=["Search Datasets", "Files", "Download File", "Search Kernels"],
)

def _bootstrap_kaggle_credentials():
    user = os.getenv("KAGGLE_USERNAME")
    key = os.getenv("KAGGLE_KEY")
    if not (user and key):
        raise RuntimeError(
            "Kaggle credentials not found."
            "Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env"
        )
    cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json"
    if not cred_path.exists():
        cred_path.parent.mkdir(exist_ok=True)
        cred_path.write_text(json.dumps({"username": user, "key": key}))
        cred_path.chmod(0o600)

_bootstrap_kaggle_credentials()  

from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)