davanstrien HF Staff commited on
Commit
7e36f16
·
1 Parent(s): c4fe3e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -5
app.py CHANGED
@@ -9,23 +9,35 @@ from datetime import date
9
  from datasets import load_dataset
10
  import plotly.express as px
11
  import os
 
 
12
 
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
  assert HF_TOKEN
15
 
16
 
17
- def yield_models():
 
18
  for model in iter(list_models(full=True)):
 
 
19
  yield "model", model
20
 
21
 
22
- def yield_spaces():
23
  for space in iter(list_spaces(full=True)):
 
 
24
  yield "space", space
25
 
26
 
27
- def yield_notebooks():
28
- for repo_type, repo in concat([yield_models(), yield_spaces()]):
 
 
 
 
 
29
  files = (f.rfilename for f in repo.siblings)
30
  if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
31
  yield {
@@ -36,8 +48,43 @@ def yield_notebooks():
36
  }
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def update_stats():
40
- df = pl.LazyFrame(yield_notebooks())
41
 
42
  df = (
43
  df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
@@ -95,6 +142,13 @@ with gr.Blocks() as demo:
95
  .sort("date")
96
  .to_pandas()
97
  )
 
 
 
 
 
 
 
98
  gr.Markdown("## Notebooks on the Hub raw data")
99
  gr.DataFrame(pandas_df)
100
 
 
9
  from datasets import load_dataset
10
  import plotly.express as px
11
  import os
12
+ from typing import Optional, Set
13
+ from functools import lru_cache
14
 
15
  HF_TOKEN = os.getenv("HF_TOKEN")
16
  assert HF_TOKEN
17
 
18
 
19
+ def yield_models(exclude_users: Optional[Set[str]] = None):
20
+ """Yields models from the hub optionally excluding users."""
21
  for model in iter(list_models(full=True)):
22
+ if exclude_users and model.author in exclude_users:
23
+ continue
24
  yield "model", model
25
 
26
 
27
+ def yield_spaces(exclude_users: Optional[Set[str]] = None):
28
  for space in iter(list_spaces(full=True)):
29
+ if exclude_users and space.author in exclude_users:
30
+ continue
31
  yield "space", space
32
 
33
 
34
+ def yield_notebooks_counts(exclude_users: Optional[Set[str]] = None):
35
+ for repo_type, repo in concat(
36
+ [
37
+ yield_models(exclude_users=exclude_users),
38
+ yield_spaces(exclude_users=exclude_users),
39
+ ]
40
+ ):
41
  files = (f.rfilename for f in repo.siblings)
42
  if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
43
  yield {
 
48
  }
49
 
50
 
51
+ def yield_notebooks(exclude_users: Optional[Set[str]] = None):
52
+ for repo_type, repo in concat(
53
+ [
54
+ yield_models(exclude_users=exclude_users),
55
+ yield_spaces(exclude_users=exclude_users),
56
+ ]
57
+ ):
58
+ files = (f.rfilename for f in repo.siblings)
59
+ if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
60
+ yield {
61
+ "repo_type": repo_type,
62
+ "repo_id": repo.id,
63
+ "repo_notebook_count": len(jupyter_notebook),
64
+ "jupyter_notebooks": jupyter_notebook,
65
+ "likes": repo.likes,
66
+ }
67
+
68
+
69
+ @lru_cache
70
+ def _get_top_liked_repos_with_notebooks(exclude_users: Optional[tuple[str]] = None):
71
+ df = pl.LazyFrame(yield_notebooks(exclude_users=exclude_users))
72
+ df = df.sort("likes", descending=True).collect()
73
+ return df
74
+
75
+
76
+ def get_top_liked_repos_with_notebooks(exclude_users: Optional[Set[str]] = None):
77
+ exclude_users = tuple(exclude_users) if exclude_users else None
78
+ return _get_top_liked_repos_with_notebooks(exclude_users)
79
+
80
+
81
+ def get_top_k_notebooks_by_repo_type(type: str = "space", k: int = 50):
82
+ df = get_top_liked_repos_with_notebooks({"gradio"})
83
+ return df.filter(pl.col("repo_type") == type).head(k).to_pandas()
84
+
85
+
86
  def update_stats():
87
+ df = pl.LazyFrame(yield_notebooks_counts())
88
 
89
  df = (
90
  df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
 
142
  .sort("date")
143
  .to_pandas()
144
  )
145
+
146
+ gr.Markdown("Top Repos by likes with notebooks")
147
+ # k = gr.Slider(10, 100, 10,step=5, label="k",interactive=True)
148
+ # repo_type = gr.Dropdown(["space", "model"], value="space", label="repo_type")
149
+ gr.DataFrame(get_top_k_notebooks_by_repo_type("space", 10)[['repo_id','likes']])
150
+ gr.DataFrame(get_top_k_notebooks_by_repo_type("model", 10)[['repo_id','likes']])
151
+ # repo_type.update(get_top_k_notebooks_by_repo_type, [repo_type, k],[df])
152
  gr.Markdown("## Notebooks on the Hub raw data")
153
  gr.DataFrame(pandas_df)
154