Spaces:

davanstrien
/

notebooks_on_the_hub

Runtime error

App Files Files Community

davanstrien HF Staff commited on Mar 30, 2023

Commit

7e36f16

1 Parent(s): c4fe3e2

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -5

app.py CHANGED Viewed

@@ -9,23 +9,35 @@ from datetime import date
 from datasets import load_dataset
 import plotly.express as px
 import os
 HF_TOKEN = os.getenv("HF_TOKEN")
 assert HF_TOKEN
-def yield_models():
     for model in iter(list_models(full=True)):
         yield "model", model
-def yield_spaces():
     for space in iter(list_spaces(full=True)):
         yield "space", space
-def yield_notebooks():
-    for repo_type, repo in concat([yield_models(), yield_spaces()]):
         files = (f.rfilename for f in repo.siblings)
         if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
             yield {
@@ -36,8 +48,43 @@ def yield_notebooks():
             }
 def update_stats():
-    df = pl.LazyFrame(yield_notebooks())
     df = (
         df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
@@ -95,6 +142,13 @@ with gr.Blocks() as demo:
         .sort("date")
         .to_pandas()
     )
     gr.Markdown("## Notebooks on the Hub raw data")
     gr.DataFrame(pandas_df)

 from datasets import load_dataset
 import plotly.express as px
 import os
+from typing import Optional, Set
+from functools import lru_cache
 HF_TOKEN = os.getenv("HF_TOKEN")
 assert HF_TOKEN
+def yield_models(exclude_users: Optional[Set[str]] = None):
+    """Yields models from the hub optionally excluding users."""
     for model in iter(list_models(full=True)):
+        if exclude_users and model.author in exclude_users:
+            continue
         yield "model", model
+def yield_spaces(exclude_users: Optional[Set[str]] = None):
     for space in iter(list_spaces(full=True)):
+        if exclude_users and space.author in exclude_users:
+            continue
         yield "space", space
+def yield_notebooks_counts(exclude_users: Optional[Set[str]] = None):
+    for repo_type, repo in concat(
+        [
+            yield_models(exclude_users=exclude_users),
+            yield_spaces(exclude_users=exclude_users),
+        ]
+    ):
         files = (f.rfilename for f in repo.siblings)
         if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
             yield {
             }
+def yield_notebooks(exclude_users: Optional[Set[str]] = None):
+    for repo_type, repo in concat(
+        [
+            yield_models(exclude_users=exclude_users),
+            yield_spaces(exclude_users=exclude_users),
+        ]
+    ):
+        files = (f.rfilename for f in repo.siblings)
+        if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
+            yield {
+                "repo_type": repo_type,
+                "repo_id": repo.id,
+                "repo_notebook_count": len(jupyter_notebook),
+                "jupyter_notebooks": jupyter_notebook,
+                "likes": repo.likes,
+            }
+@lru_cache
+def _get_top_liked_repos_with_notebooks(exclude_users: Optional[tuple[str]] = None):
+    df = pl.LazyFrame(yield_notebooks(exclude_users=exclude_users))
+    df = df.sort("likes", descending=True).collect()
+    return df
+def get_top_liked_repos_with_notebooks(exclude_users: Optional[Set[str]] = None):
+    exclude_users = tuple(exclude_users) if exclude_users else None
+    return _get_top_liked_repos_with_notebooks(exclude_users)
+def get_top_k_notebooks_by_repo_type(type: str = "space", k: int = 50):
+    df = get_top_liked_repos_with_notebooks({"gradio"})
+    return df.filter(pl.col("repo_type") == type).head(k).to_pandas()
 def update_stats():
+    df = pl.LazyFrame(yield_notebooks_counts())
     df = (
         df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
         .sort("date")
         .to_pandas()
     )
+    gr.Markdown("Top Repos by likes with notebooks")
+    # k = gr.Slider(10, 100, 10,step=5, label="k",interactive=True)
+    # repo_type = gr.Dropdown(["space", "model"], value="space", label="repo_type")
+    gr.DataFrame(get_top_k_notebooks_by_repo_type("space", 10)[['repo_id','likes']])
+    gr.DataFrame(get_top_k_notebooks_by_repo_type("model", 10)[['repo_id','likes']])
+    # repo_type.update(get_top_k_notebooks_by_repo_type, [repo_type, k],[df])
     gr.Markdown("## Notebooks on the Hub raw data")
     gr.DataFrame(pandas_df)