File size: 3,112 Bytes
b99c98e 569b533 25ba008 b99c98e 25ba008 b99c98e 25ba008 569b533 25ba008 569b533 25ba008 569b533 25ba008 569b533 25ba008 569b533 25ba008 569b533 a678f86 25ba008 e9d2479 25ba008 e9d2479 25ba008 569b533 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
import os, json, pathlib, tempfile
from typing import List, Dict
from dotenv import load_dotenv
load_dotenv()
def search_datasets(query: str, max_results: int = 20) -> List[Dict]:
"""
Return brief metadata for up to `max_results` public datasets
whose title or description matches `query`.
"""
results = api.dataset_list(search=query, max_size= None)
out = []
for ds in results[:max_results]:
out.append({
"title": ds.title,
"slug": ds.ref,
"size_mb": round(ds.total_bytes/1e6, 2),
"downloads": ds.download_count,
"votes": ds.vote_count,
})
return out
def list_files(dataset_slug: str) -> List[Dict]:
files = api.dataset_list_files(dataset_slug).files
return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files]
def download_file(dataset_slug: str, file_name: str):
tmp_dir = tempfile.mkdtemp()
api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
zip_path = pathlib.Path(tmp_dir) / f"{file_name}"
if not zip_path.exists():
zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
return str(zip_path)
search_iface = gr.Interface(
fn=search_datasets,
inputs=[
gr.Textbox(label="Search term", placeholder="e.g. titanic"),
gr.Slider(1, 50, step=1, value=20, label="Max results")
],
outputs=gr.JSON(label="Datasets"),
title="Search kaggle Datasets",
description="Resturns a JSON array of dataset metadata."
)
list_files_iface = gr.Interface(
fn=list_files,
inputs=gr.Textbox(label="Dataset slug", placeholder="zynicide/wine-reviews"),
outputs=gr.JSON(label="Files"),
title="List Dataset Files",
description="Given a dataset slug, returns its file list."
)
download_file_iface = gr.Interface(
fn=download_file,
inputs=[
gr.Textbox(label="Dataset slug", placeholder="zynicide/wine-reviews"),
gr.Textbox(label="File name", placeholder="winemag-data_first150k.csv")
],
outputs=gr.File(label="Download file"),
title="Download a File",
description="Downloads one file from the dataset and returns it."
)
demo = gr.TabbedInterface(
[search_iface, list_files_iface, download_file_iface],
tab_names=["Search", "Files", "Download"]
)
def _bootstrap_kaggle_credentials():
user = os.getenv("KAGGLE_USERNAME")
key = os.getenv("KAGGLE_KEY")
if not (user and key):
raise RuntimeError(
"Kaggle credentials not found."
"Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env"
)
cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json"
if not cred_path.exists():
cred_path.parent.mkdir(exist_ok=True)
cred_path.write_text(json.dumps({"username": user, "key": key}))
cred_path.chmod(0o600)
_bootstrap_kaggle_credentials()
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) |