davanstrien HF Staff commited on
Commit
3852cad
·
1 Parent(s): d12dd0d
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -88,18 +88,28 @@ def get_top_k_notebooks_by_repo_type(type: str = "space", k: int = 50):
88
 
89
 
90
  def raw_current_notebook_dataframe():
91
- df = pl.DataFrame(yield_notebooks_counts(exclude_users="gradio"))
 
 
92
  return df.to_pandas()
93
 
94
 
95
  def update_stats():
96
- df = pl.LazyFrame(yield_notebooks_counts(exclude_users="gradio"))
97
-
 
98
  df = (
99
  df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
100
  .unnest("repo_id")
101
  .rename({"field_0": "user", "field_1": "repo_id"})
102
  )
 
 
 
 
 
 
 
103
  by_user_count = (
104
  df.groupby("user")
105
  .agg(pl.col("repo_notebook_count").sum())
@@ -135,31 +145,32 @@ def update_stats():
135
  final_df = final_df.sort("date")
136
  pandas_df = final_df.to_pandas()
137
  # final_df.to_pandas().set_index("date", drop=True).sort_index()
138
- return pandas_df, final_df
139
 
140
 
141
  with gr.Blocks() as demo:
142
- gr.Markdown("# Notebooks on the Hub (updated daily)")
143
- pandas_df, final_df = update_stats()
144
- gr.Markdown("## Notebooks on the Hub over time")
145
- gr.Plot(px.line(pandas_df, x="date", y="repo_notebook_count", color="repo_type"))
146
- gr.Markdown("## Notebooks on the Hub (total by date)")
147
- gr.DataFrame(
148
- final_df.select(pl.col(["date", "repo_notebook_count"]))
149
- .groupby("date")
150
- .sum()
151
- .sort("date")
152
- .to_pandas()
153
- )
154
-
155
- gr.Markdown("Top Repos by likes with notebooks")
156
- # k = gr.Slider(10, 100, 10,step=5, label="k",interactive=True)
157
- # repo_type = gr.Dropdown(["space", "model"], value="space", label="repo_type")
158
- gr.DataFrame(get_top_k_notebooks_by_repo_type("space", 10)[["repo_id", "likes"]])
159
- gr.DataFrame(get_top_k_notebooks_by_repo_type("model", 10)[["repo_id", "likes"]])
160
- # repo_type.update(get_top_k_notebooks_by_repo_type, [repo_type, k],[df])
161
- gr.Markdown("## Notebooks on the Hub raw data")
162
- gr.DataFrame(pandas_df)
163
- # gr.DataFrame(raw_current_notebook_dataframe())
 
164
 
165
  demo.launch(debug=True)
 
88
 
89
 
90
  def raw_current_notebook_dataframe():
91
+ df = pl.DataFrame(
92
+ yield_notebooks_counts(exclude_users={"gradio", "gradio-pr-deploys"})
93
+ )
94
  return df.to_pandas()
95
 
96
 
97
  def update_stats():
98
+ df = pl.LazyFrame(
99
+ yield_notebooks_counts(exclude_users={"gradio", "gradio-pr-deploys"})
100
+ )
101
  df = (
102
  df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
103
  .unnest("repo_id")
104
  .rename({"field_0": "user", "field_1": "repo_id"})
105
  )
106
+ previous_raw_df = pl.DataFrame(
107
+ load_dataset("davanstrien/notebooks_on_the_hub_raw", split="train").data.table
108
+ )
109
+ final_raw_df = pl.concat([previous_raw_df, df.collect()]).unique()
110
+ Dataset(final_raw_df.to_arrow()).push_to_hub(
111
+ "davanstrien/notebooks_on_the_hub_raw", token=HF_TOKEN
112
+ )
113
  by_user_count = (
114
  df.groupby("user")
115
  .agg(pl.col("repo_notebook_count").sum())
 
145
  final_df = final_df.sort("date")
146
  pandas_df = final_df.to_pandas()
147
  # final_df.to_pandas().set_index("date", drop=True).sort_index()
148
+ return pandas_df, final_df, final_raw_df
149
 
150
 
151
  with gr.Blocks() as demo:
152
+ with gr.Tab("Notebooks on the Hub stats"):
153
+ gr.Markdown("# Notebooks on the Hub (updated daily)")
154
+ pandas_df, final_df, final_raw_df = update_stats()
155
+ gr.Markdown("## Notebooks on the Hub over time")
156
+ gr.Plot(px.line(pandas_df, x="date", y="repo_notebook_count", color="repo_type"))
157
+ gr.Markdown("## Notebooks on the Hub (total by date)")
158
+ gr.DataFrame(
159
+ final_df.select(pl.col(["date", "repo_notebook_count"]))
160
+ .groupby("date")
161
+ .sum()
162
+ .sort("date")
163
+ .to_pandas()
164
+ )
165
+
166
+ gr.Markdown("## Top Repos by likes with notebooks")
167
+ gr.Markdown("#### Top 10 Spaces")
168
+ gr.DataFrame(get_top_k_notebooks_by_repo_type("space", 10)[["repo_id", "likes"]])
169
+ gr.Markdown("#### Top 10 Models")
170
+ gr.DataFrame(get_top_k_notebooks_by_repo_type("model", 10)[["repo_id", "likes"]])
171
+ # repo_type.update(get_top_k_notebooks_by_repo_type, [repo_type, k],[df])
172
+ with gr.Tab("raw data"):
173
+ gr.Markdown("## Notebooks on the Hub raw data")
174
+ gr.DataFrame(final_raw_df.to_pandas())
175
 
176
  demo.launch(debug=True)