Roobick commited on
Commit
25ba008
·
1 Parent(s): e9c115c

Implement Kaggle API integration with dataset search, file listing, and download functionality in app.py

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +55 -4
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .mcp-venv/
app.py CHANGED
@@ -1,7 +1,58 @@
1
  import gradio as gr
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch(mcp_server=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os, json, pathlib
3
+ import pandas as pd
4
+ import tempfile, pathlib
5
+ from typing import List, Dict
6
+ from kaggle.api.kaggle_api_extended import KaggleApi
7
 
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
 
11
+ def search_datasets(query: str, max_results: int = 20) -> List[Dict]:
12
+ """
13
+ Return brief metadata for up to `max_results` public datasets
14
+ whose title or description matches `query`.
15
+ """
16
+ results = api.dataset_list(search=query, max_size=None, page_size=max_results)
17
+ out = []
18
+ for ds in results:
19
+ out.append({
20
+ "title": ds.title,
21
+ "slug": ds.ref,
22
+ "size_mb": round(ds.size/1e6, 2),
23
+ "downloads": ds.downloadCount,
24
+ "votes": ds.totalVotes,
25
+ })
26
+ return out
27
+
28
+ def list_files(dataset_slug: str) -> List[Dict]:
29
+ files = api.dataset_list_files(dataset_slug).files
30
+ return [{"name": f.name, "size_mb": round(f.totalBytes / 1e6, 2)} for f in files]
31
+
32
+ def downlaod_file(dataset_slug: str, file_name: str):
33
+ tmp_dir = tempfile.mkdtemp()
34
+ api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
35
+ zip_path = pathlib.Path(tmp_dir) / f"{file_name}"
36
+
37
+ if not zip_path.exists():
38
+ zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
39
+ return str(zip_path)
40
+
41
+ def _bootstrap_kaggle_credentials():
42
+ user = os.getenv("KAGGLE_USERNAME")
43
+ key = os.getenv("KAGGLE_KEY")
44
+ if not (user and key):
45
+ raise RuntimeError(
46
+ "Kaggle credentials not found."
47
+ "Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env"
48
+ )
49
+ cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json"
50
+ if not cred_path.exists():
51
+ cred_path.parent.mkdir(exist_ok=True)
52
+ cred_path.write_text(json.dumps({"username": user, "key": key}))
53
+ cred_path.chmod(0o600)
54
+
55
+ _bootstrap_kaggle_credentials()
56
+
57
+ api = KaggleApi()
58
+ api.authenticate()