Roobick commited on
Commit
569b533
·
1 Parent(s): 25ba008

Add Gradio interfaces for dataset search, file listing, and file download in app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -10
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
- import os, json, pathlib
3
  import pandas as pd
4
- import tempfile, pathlib
5
  from typing import List, Dict
6
  from kaggle.api.kaggle_api_extended import KaggleApi
7
 
@@ -13,23 +12,23 @@ def search_datasets(query: str, max_results: int = 20) -> List[Dict]:
13
  Return brief metadata for up to `max_results` public datasets
14
  whose title or description matches `query`.
15
  """
16
- results = api.dataset_list(search=query, max_size=None, page_size=max_results)
17
  out = []
18
- for ds in results:
19
  out.append({
20
  "title": ds.title,
21
  "slug": ds.ref,
22
- "size_mb": round(ds.size/1e6, 2),
23
- "downloads": ds.downloadCount,
24
- "votes": ds.totalVotes,
25
  })
26
  return out
27
 
28
  def list_files(dataset_slug: str) -> List[Dict]:
29
  files = api.dataset_list_files(dataset_slug).files
30
- return [{"name": f.name, "size_mb": round(f.totalBytes / 1e6, 2)} for f in files]
31
 
32
- def downlaod_file(dataset_slug: str, file_name: str):
33
  tmp_dir = tempfile.mkdtemp()
34
  api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
35
  zip_path = pathlib.Path(tmp_dir) / f"{file_name}"
@@ -38,6 +37,41 @@ def downlaod_file(dataset_slug: str, file_name: str):
38
  zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
39
  return str(zip_path)
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def _bootstrap_kaggle_credentials():
42
  user = os.getenv("KAGGLE_USERNAME")
43
  key = os.getenv("KAGGLE_KEY")
@@ -55,4 +89,7 @@ def _bootstrap_kaggle_credentials():
55
  _bootstrap_kaggle_credentials()
56
 
57
  api = KaggleApi()
58
- api.authenticate()
 
 
 
 
1
  import gradio as gr
2
+ import os, json, pathlib, tempfile
3
  import pandas as pd
 
4
  from typing import List, Dict
5
  from kaggle.api.kaggle_api_extended import KaggleApi
6
 
 
12
  Return brief metadata for up to `max_results` public datasets
13
  whose title or description matches `query`.
14
  """
15
+ results = api.dataset_list(search=query, max_size= None)
16
  out = []
17
+ for ds in results[:max_results]:
18
  out.append({
19
  "title": ds.title,
20
  "slug": ds.ref,
21
+ "size_mb": round(ds.total_bytes/1e6, 2),
22
+ "downloads": ds.download_count,
23
+ "votes": ds.vote_count,
24
  })
25
  return out
26
 
27
  def list_files(dataset_slug: str) -> List[Dict]:
28
  files = api.dataset_list_files(dataset_slug).files
29
+ return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files]
30
 
31
+ def download_file(dataset_slug: str, file_name: str):
32
  tmp_dir = tempfile.mkdtemp()
33
  api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False)
34
  zip_path = pathlib.Path(tmp_dir) / f"{file_name}"
 
37
  zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip"
38
  return str(zip_path)
39
 
40
+ search_iface = gr.Interface(
41
+ fn=search_datasets,
42
+ inputs=[
43
+ gr.Textbox(label="Search term", placeholder="e.g. titanic"),
44
+ gr.Slider(1, 50, step=1, value=20, label="Max results")
45
+ ],
46
+ outputs=gr.JSON(label="Datasets"),
47
+ title="Search kaggle Datasets",
48
+ description="Resturns a JSON array of dataset metadata."
49
+ )
50
+
51
+ list_files_iface = gr.Interface(
52
+ fn=list_files,
53
+ inputs=gr.Textbox(label="Dataset slug", placeholder="zynicide/wine-reviews"),
54
+ outputs=gr.JSON(label="Files"),
55
+ title="List Dataset Files",
56
+ description="Given a dataset slug, returns its file list."
57
+ )
58
+
59
+ download_file_iface = gr.Interface(
60
+ fn=download_file,
61
+ inputs=[
62
+ gr.Textbox(label="Dataset slug", placeholder="zynicide/wine-reviews"),
63
+ gr.Textbox(label="File name", placeholder="winemag-data_first150k.csv")
64
+ ],
65
+ outputs=gr.File(label="Download file"),
66
+ title="Download a File",
67
+ description="Downloads one file from the dataset and returns it."
68
+ )
69
+
70
+ demo = gr.TabbedInterface(
71
+ [search_iface, list_files_iface, download_file_iface],
72
+ tab_names=["Search", "Files", "Download"]
73
+ )
74
+
75
  def _bootstrap_kaggle_credentials():
76
  user = os.getenv("KAGGLE_USERNAME")
77
  key = os.getenv("KAGGLE_KEY")
 
89
  _bootstrap_kaggle_credentials()
90
 
91
  api = KaggleApi()
92
+ api.authenticate()
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)