Spaces:
Runtime error
Runtime error
Commit
·
3852cad
1
Parent(s):
d12dd0d
update
Browse files
app.py
CHANGED
@@ -88,18 +88,28 @@ def get_top_k_notebooks_by_repo_type(type: str = "space", k: int = 50):
|
|
88 |
|
89 |
|
90 |
def raw_current_notebook_dataframe():
|
91 |
-
df = pl.DataFrame(
|
|
|
|
|
92 |
return df.to_pandas()
|
93 |
|
94 |
|
95 |
def update_stats():
|
96 |
-
df = pl.LazyFrame(
|
97 |
-
|
|
|
98 |
df = (
|
99 |
df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
|
100 |
.unnest("repo_id")
|
101 |
.rename({"field_0": "user", "field_1": "repo_id"})
|
102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
by_user_count = (
|
104 |
df.groupby("user")
|
105 |
.agg(pl.col("repo_notebook_count").sum())
|
@@ -135,31 +145,32 @@ def update_stats():
|
|
135 |
final_df = final_df.sort("date")
|
136 |
pandas_df = final_df.to_pandas()
|
137 |
# final_df.to_pandas().set_index("date", drop=True).sort_index()
|
138 |
-
return pandas_df, final_df
|
139 |
|
140 |
|
141 |
with gr.Blocks() as demo:
|
142 |
-
gr.
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
gr.
|
163 |
-
|
|
|
164 |
|
165 |
demo.launch(debug=True)
|
|
|
88 |
|
89 |
|
90 |
def raw_current_notebook_dataframe():
|
91 |
+
df = pl.DataFrame(
|
92 |
+
yield_notebooks_counts(exclude_users={"gradio", "gradio-pr-deploys"})
|
93 |
+
)
|
94 |
return df.to_pandas()
|
95 |
|
96 |
|
97 |
def update_stats():
|
98 |
+
df = pl.LazyFrame(
|
99 |
+
yield_notebooks_counts(exclude_users={"gradio", "gradio-pr-deploys"})
|
100 |
+
)
|
101 |
df = (
|
102 |
df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
|
103 |
.unnest("repo_id")
|
104 |
.rename({"field_0": "user", "field_1": "repo_id"})
|
105 |
)
|
106 |
+
previous_raw_df = pl.DataFrame(
|
107 |
+
load_dataset("davanstrien/notebooks_on_the_hub_raw", split="train").data.table
|
108 |
+
)
|
109 |
+
final_raw_df = pl.concat([previous_raw_df, df.collect()]).unique()
|
110 |
+
Dataset(final_raw_df.to_arrow()).push_to_hub(
|
111 |
+
"davanstrien/notebooks_on_the_hub_raw", token=HF_TOKEN
|
112 |
+
)
|
113 |
by_user_count = (
|
114 |
df.groupby("user")
|
115 |
.agg(pl.col("repo_notebook_count").sum())
|
|
|
145 |
final_df = final_df.sort("date")
|
146 |
pandas_df = final_df.to_pandas()
|
147 |
# final_df.to_pandas().set_index("date", drop=True).sort_index()
|
148 |
+
return pandas_df, final_df, final_raw_df
|
149 |
|
150 |
|
151 |
with gr.Blocks() as demo:
|
152 |
+
with gr.Tab("Notebooks on the Hub stats"):
|
153 |
+
gr.Markdown("# Notebooks on the Hub (updated daily)")
|
154 |
+
pandas_df, final_df, final_raw_df = update_stats()
|
155 |
+
gr.Markdown("## Notebooks on the Hub over time")
|
156 |
+
gr.Plot(px.line(pandas_df, x="date", y="repo_notebook_count", color="repo_type"))
|
157 |
+
gr.Markdown("## Notebooks on the Hub (total by date)")
|
158 |
+
gr.DataFrame(
|
159 |
+
final_df.select(pl.col(["date", "repo_notebook_count"]))
|
160 |
+
.groupby("date")
|
161 |
+
.sum()
|
162 |
+
.sort("date")
|
163 |
+
.to_pandas()
|
164 |
+
)
|
165 |
+
|
166 |
+
gr.Markdown("## Top Repos by likes with notebooks")
|
167 |
+
gr.Markdown("#### Top 10 Spaces")
|
168 |
+
gr.DataFrame(get_top_k_notebooks_by_repo_type("space", 10)[["repo_id", "likes"]])
|
169 |
+
gr.Markdown("#### Top 10 Models")
|
170 |
+
gr.DataFrame(get_top_k_notebooks_by_repo_type("model", 10)[["repo_id", "likes"]])
|
171 |
+
# repo_type.update(get_top_k_notebooks_by_repo_type, [repo_type, k],[df])
|
172 |
+
with gr.Tab("raw data"):
|
173 |
+
gr.Markdown("## Notebooks on the Hub raw data")
|
174 |
+
gr.DataFrame(final_raw_df.to_pandas())
|
175 |
|
176 |
demo.launch(debug=True)
|