davanstrien HF Staff commited on
Commit
c4fe3e2
·
1 Parent(s): 0161690

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +102 -0
  2. notebooks_on_the_hub.ipynb +1966 -0
  3. requirements.in +8 -0
  4. requirements.txt +224 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from huggingface_hub import list_models, list_spaces
4
+ from pathlib import Path
5
+ from toolz import concat
6
+ from datasets import Dataset
7
+ import polars as pl
8
+ from datetime import date
9
+ from datasets import load_dataset
10
+ import plotly.express as px
11
+ import os
12
+
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
+ assert HF_TOKEN
15
+
16
+
17
+ def yield_models():
18
+ for model in iter(list_models(full=True)):
19
+ yield "model", model
20
+
21
+
22
+ def yield_spaces():
23
+ for space in iter(list_spaces(full=True)):
24
+ yield "space", space
25
+
26
+
27
+ def yield_notebooks():
28
+ for repo_type, repo in concat([yield_models(), yield_spaces()]):
29
+ files = (f.rfilename for f in repo.siblings)
30
+ if jupyter_notebook := [f for f in files if Path(f).suffix == ".ipynb"]:
31
+ yield {
32
+ "date": date.today(),
33
+ "repo_type": repo_type,
34
+ "repo_id": repo.id,
35
+ "repo_notebook_count": len(jupyter_notebook),
36
+ }
37
+
38
+
39
+ def update_stats():
40
+ df = pl.LazyFrame(yield_notebooks())
41
+
42
+ df = (
43
+ df.with_columns(pl.col("repo_id").str.split_exact("/", 1))
44
+ .unnest("repo_id")
45
+ .rename({"field_0": "user", "field_1": "repo_id"})
46
+ )
47
+ by_user_count = (
48
+ df.groupby("user")
49
+ .agg(pl.col("repo_notebook_count").sum())
50
+ .sort("repo_notebook_count", descending=True)
51
+ .collect()
52
+ )
53
+
54
+ by_user_count.mean().select(
55
+ pl.col("repo_notebook_count").alias("mean notebooks per user")
56
+ )
57
+
58
+ ds = Dataset(by_user_count.to_arrow())
59
+
60
+ ds.push_to_hub("davanstrien/notebooks_by_user", token=HF_TOKEN)
61
+
62
+ grouped = df.groupby("repo_type").agg(pl.col("repo_notebook_count").sum())
63
+ final_df = grouped.with_columns(pl.lit(date.today()).alias("date")).collect()
64
+ previous_df = pl.DataFrame(
65
+ load_dataset("davanstrien/notebooks_by_repo_type", split="train").data.table
66
+ )
67
+ final_df = pl.concat([previous_df, final_df]).unique()
68
+ spaces = final_df.filter(pl.col("repo_type") == "space").unique(
69
+ subset=["date"], keep="last"
70
+ )
71
+ models = final_df.filter(pl.col("repo_type") == "model").unique(
72
+ subset=["date"], keep="last"
73
+ )
74
+ final_df = pl.concat([spaces, models]).unique()
75
+ Dataset(final_df.to_arrow()).push_to_hub(
76
+ "davanstrien/notebooks_by_repo_type", token=HF_TOKEN
77
+ )
78
+
79
+ final_df = final_df.sort("date")
80
+ pandas_df = final_df.to_pandas()
81
+ # final_df.to_pandas().set_index("date", drop=True).sort_index()
82
+ return pandas_df, final_df
83
+
84
+
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown("# Notebooks on the Hub (updated daily)")
87
+ pandas_df, final_df = update_stats()
88
+ gr.Markdown("## Notebooks on the Hub over time")
89
+ gr.Plot(px.line(pandas_df, x="date", y="repo_notebook_count", color="repo_type"))
90
+ gr.Markdown("## Notebooks on the Hub (total by date)")
91
+ gr.DataFrame(
92
+ final_df.select(pl.col(["date", "repo_notebook_count"]))
93
+ .groupby("date")
94
+ .sum()
95
+ .sort("date")
96
+ .to_pandas()
97
+ )
98
+ gr.Markdown("## Notebooks on the Hub raw data")
99
+ gr.DataFrame(pandas_df)
100
+
101
+
102
+ demo.launch(debug=True)
notebooks_on_the_hub.ipynb ADDED
@@ -0,0 +1,1966 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 67,
6
+ "metadata": {
7
+ "id": "iNgLgZ27Dlq6"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "from huggingface_hub import list_models, list_spaces\n",
12
+ "from pathlib import Path\n",
13
+ "from toolz import concat\n",
14
+ "from datasets import Dataset\n",
15
+ "import polars as pl\n",
16
+ "from datetime import date\n",
17
+ "from datetime import date, timedelta\n",
18
+ "from datasets import load_dataset\n",
19
+ "import plotly.express as px\n",
20
+ "import os"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "HF_TOKEN = os.getenv(\"HF_TOKEN\")"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 23,
35
+ "metadata": {
36
+ "id": "mBMDThV4FA3m"
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "def yield_models():\n",
41
+ " for model in iter(list_models(full=True)):\n",
42
+ " yield \"model\", model"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 24,
48
+ "metadata": {
49
+ "id": "hDqiwFD3uTR8"
50
+ },
51
+ "outputs": [],
52
+ "source": [
53
+ "def yield_spaces():\n",
54
+ " for space in iter(list_spaces(full=True)):\n",
55
+ " yield \"space\", space"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 25,
61
+ "metadata": {
62
+ "id": "mZEhftoNFHGN"
63
+ },
64
+ "outputs": [],
65
+ "source": [
66
+ "def yield_notebooks():\n",
67
+ " for repo_type, repo in concat([yield_models(), yield_spaces()]):\n",
68
+ " files = (f.rfilename for f in repo.siblings)\n",
69
+ " if jupyter_notebook := [f for f in files if Path(f).suffix == \".ipynb\"]:\n",
70
+ " yield {\n",
71
+ " \"date\": date.today(),\n",
72
+ " \"repo_type\": repo_type,\n",
73
+ " \"repo_id\": repo.id,\n",
74
+ " \"repo_notebook_count\": len(jupyter_notebook),\n",
75
+ " }"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 26,
81
+ "metadata": {},
82
+ "outputs": [],
83
+ "source": [
84
+ "df = pl.LazyFrame(yield_notebooks())"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 27,
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "df = (\n",
94
+ " df.with_columns(pl.col(\"repo_id\").str.split_exact(\"/\", 1))\n",
95
+ " .unnest(\"repo_id\")\n",
96
+ " .rename({\"field_0\": \"user\", \"field_1\": \"repo_id\"})\n",
97
+ ")"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 28,
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "by_user_count = (\n",
107
+ " df.groupby(\"user\")\n",
108
+ " .agg(pl.col(\"repo_notebook_count\").sum())\n",
109
+ " .sort(\"repo_notebook_count\", descending=True)\n",
110
+ " .collect()\n",
111
+ ")"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 29,
117
+ "metadata": {},
118
+ "outputs": [
119
+ {
120
+ "data": {
121
+ "text/html": [
122
+ "<div><style>\n",
123
+ ".dataframe > thead > tr > th,\n",
124
+ ".dataframe > tbody > tr > td {\n",
125
+ " text-align: right;\n",
126
+ "}\n",
127
+ "</style>\n",
128
+ "<small>shape: (1540, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>user</th><th>repo_notebook_count</th></tr><tr><td>str</td><td>i64</td></tr></thead><tbody><tr><td>&quot;gradio-pr-depl…</td><td>1798</td></tr><tr><td>&quot;gradio&quot;</td><td>414</td></tr><tr><td>&quot;sgoodfriend&quot;</td><td>240</td></tr><tr><td>&quot;merve&quot;</td><td>63</td></tr><tr><td>&quot;chrisjay&quot;</td><td>62</td></tr><tr><td>&quot;infinitejoy&quot;</td><td>32</td></tr><tr><td>&quot;fabricius&quot;</td><td>29</td></tr><tr><td>&quot;aammari&quot;</td><td>26</td></tr><tr><td>&quot;flax-community…</td><td>24</td></tr><tr><td>&quot;rajesh1729&quot;</td><td>24</td></tr><tr><td>&quot;gabri14el&quot;</td><td>23</td></tr><tr><td>&quot;srush&quot;</td><td>23</td></tr><tr><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;fredbrito&quot;</td><td>1</td></tr><tr><td>&quot;JimmyLee08&quot;</td><td>1</td></tr><tr><td>&quot;BenjaminFraser…</td><td>1</td></tr><tr><td>&quot;MRamzam&quot;</td><td>1</td></tr><tr><td>&quot;Deepak107&quot;</td><td>1</td></tr><tr><td>&quot;ozyman&quot;</td><td>1</td></tr><tr><td>&quot;ELIA&quot;</td><td>1</td></tr><tr><td>&quot;zaidmukaddam&quot;</td><td>1</td></tr><tr><td>&quot;Jack003&quot;</td><td>1</td></tr><tr><td>&quot;SiddhantOjha&quot;</td><td>1</td></tr><tr><td>&quot;lowrollr&quot;</td><td>1</td></tr><tr><td>&quot;edwardpraveen&quot;</td><td>1</td></tr></tbody></table></div>"
129
+ ],
130
+ "text/plain": [
131
+ "shape: (1540, 2)\n",
132
+ "┌───────────────────┬─────────────────────┐\n",
133
+ "│ user ┆ repo_notebook_count │\n",
134
+ "│ --- ┆ --- │\n",
135
+ "│ str ┆ i64 │\n",
136
+ "╞═══════════════════╪═══════════════��═════╡\n",
137
+ "│ gradio-pr-deploys ┆ 1798 │\n",
138
+ "│ gradio ┆ 414 │\n",
139
+ "│ sgoodfriend ┆ 240 │\n",
140
+ "│ merve ┆ 63 │\n",
141
+ "│ … ┆ … │\n",
142
+ "│ Jack003 ┆ 1 │\n",
143
+ "│ SiddhantOjha ┆ 1 │\n",
144
+ "│ lowrollr ┆ 1 │\n",
145
+ "│ edwardpraveen ┆ 1 │\n",
146
+ "└───────────────────┴─────────────────────┘"
147
+ ]
148
+ },
149
+ "execution_count": 29,
150
+ "metadata": {},
151
+ "output_type": "execute_result"
152
+ }
153
+ ],
154
+ "source": [
155
+ "by_user_count"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 30,
161
+ "metadata": {},
162
+ "outputs": [
163
+ {
164
+ "data": {
165
+ "text/html": [
166
+ "<div><style>\n",
167
+ ".dataframe > thead > tr > th,\n",
168
+ ".dataframe > tbody > tr > td {\n",
169
+ " text-align: right;\n",
170
+ "}\n",
171
+ "</style>\n",
172
+ "<small>shape: (7, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>describe</th><th>user</th><th>repo_notebook_count</th></tr><tr><td>str</td><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;count&quot;</td><td>&quot;1540&quot;</td><td>1540.0</td></tr><tr><td>&quot;null_count&quot;</td><td>&quot;0&quot;</td><td>0.0</td></tr><tr><td>&quot;mean&quot;</td><td>null</td><td>3.787013</td></tr><tr><td>&quot;std&quot;</td><td>null</td><td>47.455407</td></tr><tr><td>&quot;min&quot;</td><td>&quot;007aneesh&quot;</td><td>1.0</td></tr><tr><td>&quot;max&quot;</td><td>&quot;zinoubm&quot;</td><td>1798.0</td></tr><tr><td>&quot;median&quot;</td><td>null</td><td>1.0</td></tr></tbody></table></div>"
173
+ ],
174
+ "text/plain": [
175
+ "shape: (7, 3)\n",
176
+ "┌────────────┬───────────┬─────────────────────┐\n",
177
+ "│ describe ┆ user ┆ repo_notebook_count │\n",
178
+ "│ --- ┆ --- ┆ --- │\n",
179
+ "│ str ┆ str ┆ f64 │\n",
180
+ "╞════════════╪═══════════╪═════════════════════╡\n",
181
+ "│ count ┆ 1540 ┆ 1540.0 │\n",
182
+ "│ null_count ┆ 0 ┆ 0.0 │\n",
183
+ "│ mean ┆ null ┆ 3.787013 │\n",
184
+ "│ std ┆ null ┆ 47.455407 │\n",
185
+ "│ min ┆ 007aneesh ┆ 1.0 │\n",
186
+ "│ max ┆ zinoubm ┆ 1798.0 │\n",
187
+ "│ median ┆ null ┆ 1.0 │\n",
188
+ "└────────────┴───────────┴─────────────────────┘"
189
+ ]
190
+ },
191
+ "execution_count": 30,
192
+ "metadata": {},
193
+ "output_type": "execute_result"
194
+ }
195
+ ],
196
+ "source": [
197
+ "by_user_count.describe()"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 31,
203
+ "metadata": {},
204
+ "outputs": [
205
+ {
206
+ "data": {
207
+ "text/html": [
208
+ "<div><style>\n",
209
+ ".dataframe > thead > tr > th,\n",
210
+ ".dataframe > tbody > tr > td {\n",
211
+ " text-align: right;\n",
212
+ "}\n",
213
+ "</style>\n",
214
+ "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>mean notebooks per user</th></tr><tr><td>f64</td></tr></thead><tbody><tr><td>3.787013</td></tr></tbody></table></div>"
215
+ ],
216
+ "text/plain": [
217
+ "shape: (1, 1)\n",
218
+ "┌──────────────────────────┐\n",
219
+ "│ mean notebooks per user │\n",
220
+ "│ --- │\n",
221
+ "│ f64 │\n",
222
+ "╞══════════════════════════╡\n",
223
+ "│ 3.787013 │\n",
224
+ "└──────────────────────────┘"
225
+ ]
226
+ },
227
+ "execution_count": 31,
228
+ "metadata": {},
229
+ "output_type": "execute_result"
230
+ }
231
+ ],
232
+ "source": [
233
+ "by_user_count.mean().select(\n",
234
+ " pl.col(\"repo_notebook_count\").alias(\"mean notebooks per user\")\n",
235
+ ")"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 32,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "data": {
245
+ "text/plain": [
246
+ "Dataset({\n",
247
+ " features: ['user', 'repo_notebook_count'],\n",
248
+ " num_rows: 1540\n",
249
+ "})"
250
+ ]
251
+ },
252
+ "execution_count": 32,
253
+ "metadata": {},
254
+ "output_type": "execute_result"
255
+ }
256
+ ],
257
+ "source": [
258
+ "ds = Dataset(by_user_count.to_arrow())\n",
259
+ "ds"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": 33,
265
+ "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "name": "stderr",
269
+ "output_type": "stream",
270
+ "text": [
271
+ "Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 617.08ba/s]\n",
272
+ "Upload 1 LFS files: 100%|██████████| 1/1 [00:00<00:00, 1.05it/s]\n",
273
+ "Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:02<00:00, 2.39s/it]\n",
274
+ "Deleting unused files from dataset repository: 100%|██████████| 1/1 [00:00<00:00, 2.01it/s]\n",
275
+ "Downloading metadata: 100%|██████████| 406/406 [00:00<00:00, 126kB/s]\n",
276
+ "Updating downloaded metadata with the new split.\n"
277
+ ]
278
+ }
279
+ ],
280
+ "source": [
281
+ "ds.push_to_hub(\"davanstrien/notebooks_by_user\", token=HF_TOKEN)"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 34,
287
+ "metadata": {
288
+ "id": "h6AaHRSCV397"
289
+ },
290
+ "outputs": [],
291
+ "source": [
292
+ "grouped = df.groupby(\"repo_type\").agg(pl.col(\"repo_notebook_count\").sum())"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": 35,
298
+ "metadata": {},
299
+ "outputs": [
300
+ {
301
+ "data": {
302
+ "text/html": [
303
+ "<div><style>\n",
304
+ ".dataframe > thead > tr > th,\n",
305
+ ".dataframe > tbody > tr > td {\n",
306
+ " text-align: right;\n",
307
+ "}\n",
308
+ "</style>\n",
309
+ "<small>shape: (2, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>repo_type</th><th>repo_notebook_count</th><th>date</th></tr><tr><td>str</td><td>i64</td><td>date</td></tr></thead><tbody><tr><td>&quot;space&quot;</td><td>4443</td><td>2023-03-30</td></tr><tr><td>&quot;model&quot;</td><td>1389</td><td>2023-03-30</td></tr></tbody></table></div>"
310
+ ],
311
+ "text/plain": [
312
+ "shape: (2, 3)\n",
313
+ "┌───────────┬─────────────────────┬────────────┐\n",
314
+ "│ repo_type ┆ repo_notebook_count ┆ date │\n",
315
+ "│ --- ┆ --- ┆ --- │\n",
316
+ "│ str ┆ i64 ┆ date │\n",
317
+ "╞═══════════╪═════════════════════╪════════════╡\n",
318
+ "│ space ┆ 4443 ┆ 2023-03-30 │\n",
319
+ "│ model ┆ 1389 ┆ 2023-03-30 │\n",
320
+ "└───────────┴─────────────────────┴────────────┘"
321
+ ]
322
+ },
323
+ "execution_count": 35,
324
+ "metadata": {},
325
+ "output_type": "execute_result"
326
+ }
327
+ ],
328
+ "source": [
329
+ "final_df = grouped.with_columns(pl.lit(date.today()).alias(\"date\")).collect()\n",
330
+ "final_df"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": 36,
336
+ "metadata": {},
337
+ "outputs": [
338
+ {
339
+ "name": "stderr",
340
+ "output_type": "stream",
341
+ "text": [
342
+ "Downloading readme: 100%|██████████| 441/441 [00:00<00:00, 130kB/s]\n"
343
+ ]
344
+ },
345
+ {
346
+ "name": "stdout",
347
+ "output_type": "stream",
348
+ "text": [
349
+ "Downloading and preparing dataset None/None to /Users/davanstrien/.cache/huggingface/datasets/davanstrien___parquet/davanstrien--notebooks_by_repo_type-1004c11b0535dac5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...\n"
350
+ ]
351
+ },
352
+ {
353
+ "name": "stderr",
354
+ "output_type": "stream",
355
+ "text": [
356
+ "Downloading data: 100%|██████████| 1.87k/1.87k [00:00<00:00, 705kB/s]\n",
357
+ "Downloading data files: 100%|██████████| 1/1 [00:01<00:00, 1.71s/it]\n",
358
+ "Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 786.78it/s]\n",
359
+ " "
360
+ ]
361
+ },
362
+ {
363
+ "name": "stdout",
364
+ "output_type": "stream",
365
+ "text": [
366
+ "Dataset parquet downloaded and prepared to /Users/davanstrien/.cache/huggingface/datasets/davanstrien___parquet/davanstrien--notebooks_by_repo_type-1004c11b0535dac5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.\n"
367
+ ]
368
+ },
369
+ {
370
+ "name": "stderr",
371
+ "output_type": "stream",
372
+ "text": [
373
+ "\r"
374
+ ]
375
+ },
376
+ {
377
+ "data": {
378
+ "text/html": [
379
+ "<div><style>\n",
380
+ ".dataframe > thead > tr > th,\n",
381
+ ".dataframe > tbody > tr > td {\n",
382
+ " text-align: right;\n",
383
+ "}\n",
384
+ "</style>\n",
385
+ "<small>shape: (7, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>repo_type</th><th>repo_notebook_count</th><th>date</th></tr><tr><td>str</td><td>i64</td><td>date</td></tr></thead><tbody><tr><td>&quot;space&quot;</td><td>3956</td><td>2023-03-27</td></tr><tr><td>&quot;model&quot;</td><td>1346</td><td>2023-03-27</td></tr><tr><td>&quot;model&quot;</td><td>1348</td><td>2023-03-28</td></tr><tr><td>&quot;space&quot;</td><td>4386</td><td>2023-03-28</td></tr><tr><td>&quot;space&quot;</td><td>4422</td><td>2023-03-28</td></tr><tr><td>&quot;space&quot;</td><td>4579</td><td>2023-03-29</td></tr><tr><td>&quot;model&quot;</td><td>1384</td><td>2023-03-29</td></tr></tbody></table></div>"
386
+ ],
387
+ "text/plain": [
388
+ "shape: (7, 3)\n",
389
+ "┌───────────┬─────────────────────┬────────────┐\n",
390
+ "│ repo_type ┆ repo_notebook_count ┆ date │\n",
391
+ "│ --- ┆ --- ┆ --- │\n",
392
+ "│ str ┆ i64 ┆ date │\n",
393
+ "╞═══════════╪═════════════════════╪════════════╡\n",
394
+ "│ space ┆ 3956 ┆ 2023-03-27 │\n",
395
+ "│ model ┆ 1346 ┆ 2023-03-27 │\n",
396
+ "│ model ┆ 1348 ┆ 2023-03-28 │\n",
397
+ "│ space ┆ 4386 ┆ 2023-03-28 │\n",
398
+ "│ space ┆ 4422 ┆ 2023-03-28 │\n",
399
+ "│ space ┆ 4579 ┆ 2023-03-29 │\n",
400
+ "│ model ┆ 1384 ┆ 2023-03-29 │\n",
401
+ "└───────────┴─────────────────────┴────────────┘"
402
+ ]
403
+ },
404
+ "execution_count": 36,
405
+ "metadata": {},
406
+ "output_type": "execute_result"
407
+ }
408
+ ],
409
+ "source": [
410
+ "previous_df = pl.DataFrame(\n",
411
+ " load_dataset(\"davanstrien/notebooks_by_repo_type\", split=\"train\").data.table\n",
412
+ ")\n",
413
+ "previous_df"
414
+ ]
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "execution_count": 37,
419
+ "metadata": {},
420
+ "outputs": [],
421
+ "source": [
422
+ "final_df = pl.concat([previous_df, final_df]).unique()"
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": 57,
428
+ "metadata": {},
429
+ "outputs": [],
430
+ "source": [
431
+ "spaces = final_df.filter(pl.col(\"repo_type\") == \"space\").unique(subset=[\"date\"])\n",
432
+ "models = final_df.filter(pl.col(\"repo_type\") == \"model\").unique(subset=[\"date\"])"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "code",
437
+ "execution_count": 58,
438
+ "metadata": {},
439
+ "outputs": [],
440
+ "source": [
441
+ "final_df = pl.concat([spaces, models]).unique()"
442
+ ]
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "execution_count": 62,
447
+ "metadata": {},
448
+ "outputs": [
449
+ {
450
+ "data": {
451
+ "text/html": [
452
+ "<div><style>\n",
453
+ ".dataframe > thead > tr > th,\n",
454
+ ".dataframe > tbody > tr > td {\n",
455
+ " text-align: right;\n",
456
+ "}\n",
457
+ "</style>\n",
458
+ "<small>shape: (8, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>repo_type</th><th>repo_notebook_count</th><th>date</th></tr><tr><td>str</td><td>i64</td><td>date</td></tr></thead><tbody><tr><td>&quot;space&quot;</td><td>3956</td><td>2023-03-27</td></tr><tr><td>&quot;model&quot;</td><td>1346</td><td>2023-03-27</td></tr><tr><td>&quot;space&quot;</td><td>4386</td><td>2023-03-28</td></tr><tr><td>&quot;model&quot;</td><td>1348</td><td>2023-03-28</td></tr><tr><td>&quot;space&quot;</td><td>4579</td><td>2023-03-29</td></tr><tr><td>&quot;model&quot;</td><td>1384</td><td>2023-03-29</td></tr><tr><td>&quot;space&quot;</td><td>4443</td><td>2023-03-30</td></tr><tr><td>&quot;model&quot;</td><td>1389</td><td>2023-03-30</td></tr></tbody></table></div>"
459
+ ],
460
+ "text/plain": [
461
+ "shape: (8, 3)\n",
462
+ "┌───────────┬─────────────────────┬────────────┐\n",
463
+ "│ repo_type ┆ repo_notebook_count ┆ date │\n",
464
+ "│ --- ┆ --- ┆ --- │\n",
465
+ "│ str ┆ i64 ┆ date │\n",
466
+ "╞═══════════╪═════════════════════╪════════════╡\n",
467
+ "│ space ┆ 3956 ┆ 2023-03-27 │\n",
468
+ "│ model ┆ 1346 ┆ 2023-03-27 │\n",
469
+ "│ space ┆ 4386 ┆ 2023-03-28 │\n",
470
+ "│ model ┆ 1348 ┆ 2023-03-28 │\n",
471
+ "│ space ┆ 4579 ┆ 2023-03-29 │\n",
472
+ "│ model ┆ 1384 ┆ 2023-03-29 │\n",
473
+ "│ space ┆ 4443 ┆ 2023-03-30 │\n",
474
+ "│ model ┆ 1389 ┆ 2023-03-30 │\n",
475
+ "└───────────┴─────────────────────┴────────────┘"
476
+ ]
477
+ },
478
+ "execution_count": 62,
479
+ "metadata": {},
480
+ "output_type": "execute_result"
481
+ }
482
+ ],
483
+ "source": [
484
+ "final_df = final_df.sort(\"date\")\n",
485
+ "final_df"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": 55,
491
+ "metadata": {},
492
+ "outputs": [
493
+ {
494
+ "data": {
495
+ "text/plain": [
496
+ "<polars.dataframe.groupby.GroupBy at 0x2a1035d10>"
497
+ ]
498
+ },
499
+ "execution_count": 55,
500
+ "metadata": {},
501
+ "output_type": "execute_result"
502
+ }
503
+ ],
504
+ "source": [
505
+ "final_df.groupby(\"repo_type\")"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "code",
510
+ "execution_count": 63,
511
+ "metadata": {},
512
+ "outputs": [
513
+ {
514
+ "name": "stderr",
515
+ "output_type": "stream",
516
+ "text": [
517
+ "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 730.46ba/s]\n",
518
+ "Upload 1 LFS files: 100%|██████████| 1/1 [00:00<00:00, 1.26it/s]\n",
519
+ "Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:02<00:00, 2.14s/it]\n",
520
+ "Deleting unused files from dataset repository: 100%|██████████| 1/1 [00:00<00:00, 1.87it/s]\n",
521
+ "Downloading metadata: 100%|██████████| 441/441 [00:00<00:00, 173kB/s]\n",
522
+ "Updating downloaded metadata with the new split.\n"
523
+ ]
524
+ }
525
+ ],
526
+ "source": [
527
+ "Dataset(final_df.to_arrow()).push_to_hub(\n",
528
+ " \"davanstrien/notebooks_by_repo_type\", token=HF_TOKEN\n",
529
+ ")"
530
+ ]
531
+ },
532
+ {
533
+ "cell_type": "code",
534
+ "execution_count": 73,
535
+ "metadata": {
536
+ "colab": {
537
+ "base_uri": "https://localhost:8080/",
538
+ "height": 224
539
+ },
540
+ "id": "T3vhuLpgdyKh",
541
+ "outputId": "82a58845-ec9f-40d7-cbad-abddf7ad467a"
542
+ },
543
+ "outputs": [],
544
+ "source": [
545
+ "final_df = final_df.sort(\"date\")\n",
546
+ "pandas_df = final_df.to_pandas()"
547
+ ]
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "execution_count": 65,
552
+ "metadata": {},
553
+ "outputs": [
554
+ {
555
+ "data": {
556
+ "text/html": [
557
+ "<div>\n",
558
+ "<style scoped>\n",
559
+ " .dataframe tbody tr th:only-of-type {\n",
560
+ " vertical-align: middle;\n",
561
+ " }\n",
562
+ "\n",
563
+ " .dataframe tbody tr th {\n",
564
+ " vertical-align: top;\n",
565
+ " }\n",
566
+ "\n",
567
+ " .dataframe thead th {\n",
568
+ " text-align: right;\n",
569
+ " }\n",
570
+ "</style>\n",
571
+ "<table border=\"1\" class=\"dataframe\">\n",
572
+ " <thead>\n",
573
+ " <tr style=\"text-align: right;\">\n",
574
+ " <th></th>\n",
575
+ " <th>repo_type</th>\n",
576
+ " <th>repo_notebook_count</th>\n",
577
+ " </tr>\n",
578
+ " <tr>\n",
579
+ " <th>date</th>\n",
580
+ " <th></th>\n",
581
+ " <th></th>\n",
582
+ " </tr>\n",
583
+ " </thead>\n",
584
+ " <tbody>\n",
585
+ " <tr>\n",
586
+ " <th>2023-03-27</th>\n",
587
+ " <td>space</td>\n",
588
+ " <td>3956</td>\n",
589
+ " </tr>\n",
590
+ " <tr>\n",
591
+ " <th>2023-03-27</th>\n",
592
+ " <td>model</td>\n",
593
+ " <td>1346</td>\n",
594
+ " </tr>\n",
595
+ " <tr>\n",
596
+ " <th>2023-03-28</th>\n",
597
+ " <td>space</td>\n",
598
+ " <td>4386</td>\n",
599
+ " </tr>\n",
600
+ " <tr>\n",
601
+ " <th>2023-03-28</th>\n",
602
+ " <td>model</td>\n",
603
+ " <td>1348</td>\n",
604
+ " </tr>\n",
605
+ " <tr>\n",
606
+ " <th>2023-03-29</th>\n",
607
+ " <td>space</td>\n",
608
+ " <td>4579</td>\n",
609
+ " </tr>\n",
610
+ " <tr>\n",
611
+ " <th>2023-03-29</th>\n",
612
+ " <td>model</td>\n",
613
+ " <td>1384</td>\n",
614
+ " </tr>\n",
615
+ " <tr>\n",
616
+ " <th>2023-03-30</th>\n",
617
+ " <td>space</td>\n",
618
+ " <td>4443</td>\n",
619
+ " </tr>\n",
620
+ " <tr>\n",
621
+ " <th>2023-03-30</th>\n",
622
+ " <td>model</td>\n",
623
+ " <td>1389</td>\n",
624
+ " </tr>\n",
625
+ " </tbody>\n",
626
+ "</table>\n",
627
+ "</div>"
628
+ ],
629
+ "text/plain": [
630
+ " repo_type repo_notebook_count\n",
631
+ "date \n",
632
+ "2023-03-27 space 3956\n",
633
+ "2023-03-27 model 1346\n",
634
+ "2023-03-28 space 4386\n",
635
+ "2023-03-28 model 1348\n",
636
+ "2023-03-29 space 4579\n",
637
+ "2023-03-29 model 1384\n",
638
+ "2023-03-30 space 4443\n",
639
+ "2023-03-30 model 1389"
640
+ ]
641
+ },
642
+ "execution_count": 65,
643
+ "metadata": {},
644
+ "output_type": "execute_result"
645
+ }
646
+ ],
647
+ "source": [
648
+ "# final_df.to_pandas().set_index(\"date\", drop=True).sort_index()"
649
+ ]
650
+ },
651
+ {
652
+ "cell_type": "code",
653
+ "execution_count": null,
654
+ "metadata": {},
655
+ "outputs": [],
656
+ "source": []
657
+ },
658
+ {
659
+ "cell_type": "code",
660
+ "execution_count": 66,
661
+ "metadata": {},
662
+ "outputs": [
663
+ {
664
+ "data": {
665
+ "application/vnd.plotly.v1+json": {
666
+ "config": {
667
+ "plotlyServerURL": "https://plot.ly"
668
+ },
669
+ "data": [
670
+ {
671
+ "hovertemplate": "repo_type=space<br>date=%{x}<br>repo_notebook_count=%{y}<extra></extra>",
672
+ "legendgroup": "space",
673
+ "line": {
674
+ "color": "#636efa",
675
+ "dash": "solid"
676
+ },
677
+ "marker": {
678
+ "symbol": "circle"
679
+ },
680
+ "mode": "lines",
681
+ "name": "space",
682
+ "orientation": "v",
683
+ "showlegend": true,
684
+ "type": "scatter",
685
+ "x": [
686
+ "2023-03-27T00:00:00",
687
+ "2023-03-28T00:00:00",
688
+ "2023-03-29T00:00:00",
689
+ "2023-03-30T00:00:00"
690
+ ],
691
+ "xaxis": "x",
692
+ "y": [
693
+ 3956,
694
+ 4386,
695
+ 4579,
696
+ 4443
697
+ ],
698
+ "yaxis": "y"
699
+ },
700
+ {
701
+ "hovertemplate": "repo_type=model<br>date=%{x}<br>repo_notebook_count=%{y}<extra></extra>",
702
+ "legendgroup": "model",
703
+ "line": {
704
+ "color": "#EF553B",
705
+ "dash": "solid"
706
+ },
707
+ "marker": {
708
+ "symbol": "circle"
709
+ },
710
+ "mode": "lines",
711
+ "name": "model",
712
+ "orientation": "v",
713
+ "showlegend": true,
714
+ "type": "scatter",
715
+ "x": [
716
+ "2023-03-27T00:00:00",
717
+ "2023-03-28T00:00:00",
718
+ "2023-03-29T00:00:00",
719
+ "2023-03-30T00:00:00"
720
+ ],
721
+ "xaxis": "x",
722
+ "y": [
723
+ 1346,
724
+ 1348,
725
+ 1384,
726
+ 1389
727
+ ],
728
+ "yaxis": "y"
729
+ }
730
+ ],
731
+ "layout": {
732
+ "legend": {
733
+ "title": {
734
+ "text": "repo_type"
735
+ },
736
+ "tracegroupgap": 0
737
+ },
738
+ "margin": {
739
+ "t": 60
740
+ },
741
+ "template": {
742
+ "data": {
743
+ "bar": [
744
+ {
745
+ "error_x": {
746
+ "color": "#2a3f5f"
747
+ },
748
+ "error_y": {
749
+ "color": "#2a3f5f"
750
+ },
751
+ "marker": {
752
+ "line": {
753
+ "color": "#E5ECF6",
754
+ "width": 0.5
755
+ },
756
+ "pattern": {
757
+ "fillmode": "overlay",
758
+ "size": 10,
759
+ "solidity": 0.2
760
+ }
761
+ },
762
+ "type": "bar"
763
+ }
764
+ ],
765
+ "barpolar": [
766
+ {
767
+ "marker": {
768
+ "line": {
769
+ "color": "#E5ECF6",
770
+ "width": 0.5
771
+ },
772
+ "pattern": {
773
+ "fillmode": "overlay",
774
+ "size": 10,
775
+ "solidity": 0.2
776
+ }
777
+ },
778
+ "type": "barpolar"
779
+ }
780
+ ],
781
+ "carpet": [
782
+ {
783
+ "aaxis": {
784
+ "endlinecolor": "#2a3f5f",
785
+ "gridcolor": "white",
786
+ "linecolor": "white",
787
+ "minorgridcolor": "white",
788
+ "startlinecolor": "#2a3f5f"
789
+ },
790
+ "baxis": {
791
+ "endlinecolor": "#2a3f5f",
792
+ "gridcolor": "white",
793
+ "linecolor": "white",
794
+ "minorgridcolor": "white",
795
+ "startlinecolor": "#2a3f5f"
796
+ },
797
+ "type": "carpet"
798
+ }
799
+ ],
800
+ "choropleth": [
801
+ {
802
+ "colorbar": {
803
+ "outlinewidth": 0,
804
+ "ticks": ""
805
+ },
806
+ "type": "choropleth"
807
+ }
808
+ ],
809
+ "contour": [
810
+ {
811
+ "colorbar": {
812
+ "outlinewidth": 0,
813
+ "ticks": ""
814
+ },
815
+ "colorscale": [
816
+ [
817
+ 0,
818
+ "#0d0887"
819
+ ],
820
+ [
821
+ 0.1111111111111111,
822
+ "#46039f"
823
+ ],
824
+ [
825
+ 0.2222222222222222,
826
+ "#7201a8"
827
+ ],
828
+ [
829
+ 0.3333333333333333,
830
+ "#9c179e"
831
+ ],
832
+ [
833
+ 0.4444444444444444,
834
+ "#bd3786"
835
+ ],
836
+ [
837
+ 0.5555555555555556,
838
+ "#d8576b"
839
+ ],
840
+ [
841
+ 0.6666666666666666,
842
+ "#ed7953"
843
+ ],
844
+ [
845
+ 0.7777777777777778,
846
+ "#fb9f3a"
847
+ ],
848
+ [
849
+ 0.8888888888888888,
850
+ "#fdca26"
851
+ ],
852
+ [
853
+ 1,
854
+ "#f0f921"
855
+ ]
856
+ ],
857
+ "type": "contour"
858
+ }
859
+ ],
860
+ "contourcarpet": [
861
+ {
862
+ "colorbar": {
863
+ "outlinewidth": 0,
864
+ "ticks": ""
865
+ },
866
+ "type": "contourcarpet"
867
+ }
868
+ ],
869
+ "heatmap": [
870
+ {
871
+ "colorbar": {
872
+ "outlinewidth": 0,
873
+ "ticks": ""
874
+ },
875
+ "colorscale": [
876
+ [
877
+ 0,
878
+ "#0d0887"
879
+ ],
880
+ [
881
+ 0.1111111111111111,
882
+ "#46039f"
883
+ ],
884
+ [
885
+ 0.2222222222222222,
886
+ "#7201a8"
887
+ ],
888
+ [
889
+ 0.3333333333333333,
890
+ "#9c179e"
891
+ ],
892
+ [
893
+ 0.4444444444444444,
894
+ "#bd3786"
895
+ ],
896
+ [
897
+ 0.5555555555555556,
898
+ "#d8576b"
899
+ ],
900
+ [
901
+ 0.6666666666666666,
902
+ "#ed7953"
903
+ ],
904
+ [
905
+ 0.7777777777777778,
906
+ "#fb9f3a"
907
+ ],
908
+ [
909
+ 0.8888888888888888,
910
+ "#fdca26"
911
+ ],
912
+ [
913
+ 1,
914
+ "#f0f921"
915
+ ]
916
+ ],
917
+ "type": "heatmap"
918
+ }
919
+ ],
920
+ "heatmapgl": [
921
+ {
922
+ "colorbar": {
923
+ "outlinewidth": 0,
924
+ "ticks": ""
925
+ },
926
+ "colorscale": [
927
+ [
928
+ 0,
929
+ "#0d0887"
930
+ ],
931
+ [
932
+ 0.1111111111111111,
933
+ "#46039f"
934
+ ],
935
+ [
936
+ 0.2222222222222222,
937
+ "#7201a8"
938
+ ],
939
+ [
940
+ 0.3333333333333333,
941
+ "#9c179e"
942
+ ],
943
+ [
944
+ 0.4444444444444444,
945
+ "#bd3786"
946
+ ],
947
+ [
948
+ 0.5555555555555556,
949
+ "#d8576b"
950
+ ],
951
+ [
952
+ 0.6666666666666666,
953
+ "#ed7953"
954
+ ],
955
+ [
956
+ 0.7777777777777778,
957
+ "#fb9f3a"
958
+ ],
959
+ [
960
+ 0.8888888888888888,
961
+ "#fdca26"
962
+ ],
963
+ [
964
+ 1,
965
+ "#f0f921"
966
+ ]
967
+ ],
968
+ "type": "heatmapgl"
969
+ }
970
+ ],
971
+ "histogram": [
972
+ {
973
+ "marker": {
974
+ "pattern": {
975
+ "fillmode": "overlay",
976
+ "size": 10,
977
+ "solidity": 0.2
978
+ }
979
+ },
980
+ "type": "histogram"
981
+ }
982
+ ],
983
+ "histogram2d": [
984
+ {
985
+ "colorbar": {
986
+ "outlinewidth": 0,
987
+ "ticks": ""
988
+ },
989
+ "colorscale": [
990
+ [
991
+ 0,
992
+ "#0d0887"
993
+ ],
994
+ [
995
+ 0.1111111111111111,
996
+ "#46039f"
997
+ ],
998
+ [
999
+ 0.2222222222222222,
1000
+ "#7201a8"
1001
+ ],
1002
+ [
1003
+ 0.3333333333333333,
1004
+ "#9c179e"
1005
+ ],
1006
+ [
1007
+ 0.4444444444444444,
1008
+ "#bd3786"
1009
+ ],
1010
+ [
1011
+ 0.5555555555555556,
1012
+ "#d8576b"
1013
+ ],
1014
+ [
1015
+ 0.6666666666666666,
1016
+ "#ed7953"
1017
+ ],
1018
+ [
1019
+ 0.7777777777777778,
1020
+ "#fb9f3a"
1021
+ ],
1022
+ [
1023
+ 0.8888888888888888,
1024
+ "#fdca26"
1025
+ ],
1026
+ [
1027
+ 1,
1028
+ "#f0f921"
1029
+ ]
1030
+ ],
1031
+ "type": "histogram2d"
1032
+ }
1033
+ ],
1034
+ "histogram2dcontour": [
1035
+ {
1036
+ "colorbar": {
1037
+ "outlinewidth": 0,
1038
+ "ticks": ""
1039
+ },
1040
+ "colorscale": [
1041
+ [
1042
+ 0,
1043
+ "#0d0887"
1044
+ ],
1045
+ [
1046
+ 0.1111111111111111,
1047
+ "#46039f"
1048
+ ],
1049
+ [
1050
+ 0.2222222222222222,
1051
+ "#7201a8"
1052
+ ],
1053
+ [
1054
+ 0.3333333333333333,
1055
+ "#9c179e"
1056
+ ],
1057
+ [
1058
+ 0.4444444444444444,
1059
+ "#bd3786"
1060
+ ],
1061
+ [
1062
+ 0.5555555555555556,
1063
+ "#d8576b"
1064
+ ],
1065
+ [
1066
+ 0.6666666666666666,
1067
+ "#ed7953"
1068
+ ],
1069
+ [
1070
+ 0.7777777777777778,
1071
+ "#fb9f3a"
1072
+ ],
1073
+ [
1074
+ 0.8888888888888888,
1075
+ "#fdca26"
1076
+ ],
1077
+ [
1078
+ 1,
1079
+ "#f0f921"
1080
+ ]
1081
+ ],
1082
+ "type": "histogram2dcontour"
1083
+ }
1084
+ ],
1085
+ "mesh3d": [
1086
+ {
1087
+ "colorbar": {
1088
+ "outlinewidth": 0,
1089
+ "ticks": ""
1090
+ },
1091
+ "type": "mesh3d"
1092
+ }
1093
+ ],
1094
+ "parcoords": [
1095
+ {
1096
+ "line": {
1097
+ "colorbar": {
1098
+ "outlinewidth": 0,
1099
+ "ticks": ""
1100
+ }
1101
+ },
1102
+ "type": "parcoords"
1103
+ }
1104
+ ],
1105
+ "pie": [
1106
+ {
1107
+ "automargin": true,
1108
+ "type": "pie"
1109
+ }
1110
+ ],
1111
+ "scatter": [
1112
+ {
1113
+ "fillpattern": {
1114
+ "fillmode": "overlay",
1115
+ "size": 10,
1116
+ "solidity": 0.2
1117
+ },
1118
+ "type": "scatter"
1119
+ }
1120
+ ],
1121
+ "scatter3d": [
1122
+ {
1123
+ "line": {
1124
+ "colorbar": {
1125
+ "outlinewidth": 0,
1126
+ "ticks": ""
1127
+ }
1128
+ },
1129
+ "marker": {
1130
+ "colorbar": {
1131
+ "outlinewidth": 0,
1132
+ "ticks": ""
1133
+ }
1134
+ },
1135
+ "type": "scatter3d"
1136
+ }
1137
+ ],
1138
+ "scattercarpet": [
1139
+ {
1140
+ "marker": {
1141
+ "colorbar": {
1142
+ "outlinewidth": 0,
1143
+ "ticks": ""
1144
+ }
1145
+ },
1146
+ "type": "scattercarpet"
1147
+ }
1148
+ ],
1149
+ "scattergeo": [
1150
+ {
1151
+ "marker": {
1152
+ "colorbar": {
1153
+ "outlinewidth": 0,
1154
+ "ticks": ""
1155
+ }
1156
+ },
1157
+ "type": "scattergeo"
1158
+ }
1159
+ ],
1160
+ "scattergl": [
1161
+ {
1162
+ "marker": {
1163
+ "colorbar": {
1164
+ "outlinewidth": 0,
1165
+ "ticks": ""
1166
+ }
1167
+ },
1168
+ "type": "scattergl"
1169
+ }
1170
+ ],
1171
+ "scattermapbox": [
1172
+ {
1173
+ "marker": {
1174
+ "colorbar": {
1175
+ "outlinewidth": 0,
1176
+ "ticks": ""
1177
+ }
1178
+ },
1179
+ "type": "scattermapbox"
1180
+ }
1181
+ ],
1182
+ "scatterpolar": [
1183
+ {
1184
+ "marker": {
1185
+ "colorbar": {
1186
+ "outlinewidth": 0,
1187
+ "ticks": ""
1188
+ }
1189
+ },
1190
+ "type": "scatterpolar"
1191
+ }
1192
+ ],
1193
+ "scatterpolargl": [
1194
+ {
1195
+ "marker": {
1196
+ "colorbar": {
1197
+ "outlinewidth": 0,
1198
+ "ticks": ""
1199
+ }
1200
+ },
1201
+ "type": "scatterpolargl"
1202
+ }
1203
+ ],
1204
+ "scatterternary": [
1205
+ {
1206
+ "marker": {
1207
+ "colorbar": {
1208
+ "outlinewidth": 0,
1209
+ "ticks": ""
1210
+ }
1211
+ },
1212
+ "type": "scatterternary"
1213
+ }
1214
+ ],
1215
+ "surface": [
1216
+ {
1217
+ "colorbar": {
1218
+ "outlinewidth": 0,
1219
+ "ticks": ""
1220
+ },
1221
+ "colorscale": [
1222
+ [
1223
+ 0,
1224
+ "#0d0887"
1225
+ ],
1226
+ [
1227
+ 0.1111111111111111,
1228
+ "#46039f"
1229
+ ],
1230
+ [
1231
+ 0.2222222222222222,
1232
+ "#7201a8"
1233
+ ],
1234
+ [
1235
+ 0.3333333333333333,
1236
+ "#9c179e"
1237
+ ],
1238
+ [
1239
+ 0.4444444444444444,
1240
+ "#bd3786"
1241
+ ],
1242
+ [
1243
+ 0.5555555555555556,
1244
+ "#d8576b"
1245
+ ],
1246
+ [
1247
+ 0.6666666666666666,
1248
+ "#ed7953"
1249
+ ],
1250
+ [
1251
+ 0.7777777777777778,
1252
+ "#fb9f3a"
1253
+ ],
1254
+ [
1255
+ 0.8888888888888888,
1256
+ "#fdca26"
1257
+ ],
1258
+ [
1259
+ 1,
1260
+ "#f0f921"
1261
+ ]
1262
+ ],
1263
+ "type": "surface"
1264
+ }
1265
+ ],
1266
+ "table": [
1267
+ {
1268
+ "cells": {
1269
+ "fill": {
1270
+ "color": "#EBF0F8"
1271
+ },
1272
+ "line": {
1273
+ "color": "white"
1274
+ }
1275
+ },
1276
+ "header": {
1277
+ "fill": {
1278
+ "color": "#C8D4E3"
1279
+ },
1280
+ "line": {
1281
+ "color": "white"
1282
+ }
1283
+ },
1284
+ "type": "table"
1285
+ }
1286
+ ]
1287
+ },
1288
+ "layout": {
1289
+ "annotationdefaults": {
1290
+ "arrowcolor": "#2a3f5f",
1291
+ "arrowhead": 0,
1292
+ "arrowwidth": 1
1293
+ },
1294
+ "autotypenumbers": "strict",
1295
+ "coloraxis": {
1296
+ "colorbar": {
1297
+ "outlinewidth": 0,
1298
+ "ticks": ""
1299
+ }
1300
+ },
1301
+ "colorscale": {
1302
+ "diverging": [
1303
+ [
1304
+ 0,
1305
+ "#8e0152"
1306
+ ],
1307
+ [
1308
+ 0.1,
1309
+ "#c51b7d"
1310
+ ],
1311
+ [
1312
+ 0.2,
1313
+ "#de77ae"
1314
+ ],
1315
+ [
1316
+ 0.3,
1317
+ "#f1b6da"
1318
+ ],
1319
+ [
1320
+ 0.4,
1321
+ "#fde0ef"
1322
+ ],
1323
+ [
1324
+ 0.5,
1325
+ "#f7f7f7"
1326
+ ],
1327
+ [
1328
+ 0.6,
1329
+ "#e6f5d0"
1330
+ ],
1331
+ [
1332
+ 0.7,
1333
+ "#b8e186"
1334
+ ],
1335
+ [
1336
+ 0.8,
1337
+ "#7fbc41"
1338
+ ],
1339
+ [
1340
+ 0.9,
1341
+ "#4d9221"
1342
+ ],
1343
+ [
1344
+ 1,
1345
+ "#276419"
1346
+ ]
1347
+ ],
1348
+ "sequential": [
1349
+ [
1350
+ 0,
1351
+ "#0d0887"
1352
+ ],
1353
+ [
1354
+ 0.1111111111111111,
1355
+ "#46039f"
1356
+ ],
1357
+ [
1358
+ 0.2222222222222222,
1359
+ "#7201a8"
1360
+ ],
1361
+ [
1362
+ 0.3333333333333333,
1363
+ "#9c179e"
1364
+ ],
1365
+ [
1366
+ 0.4444444444444444,
1367
+ "#bd3786"
1368
+ ],
1369
+ [
1370
+ 0.5555555555555556,
1371
+ "#d8576b"
1372
+ ],
1373
+ [
1374
+ 0.6666666666666666,
1375
+ "#ed7953"
1376
+ ],
1377
+ [
1378
+ 0.7777777777777778,
1379
+ "#fb9f3a"
1380
+ ],
1381
+ [
1382
+ 0.8888888888888888,
1383
+ "#fdca26"
1384
+ ],
1385
+ [
1386
+ 1,
1387
+ "#f0f921"
1388
+ ]
1389
+ ],
1390
+ "sequentialminus": [
1391
+ [
1392
+ 0,
1393
+ "#0d0887"
1394
+ ],
1395
+ [
1396
+ 0.1111111111111111,
1397
+ "#46039f"
1398
+ ],
1399
+ [
1400
+ 0.2222222222222222,
1401
+ "#7201a8"
1402
+ ],
1403
+ [
1404
+ 0.3333333333333333,
1405
+ "#9c179e"
1406
+ ],
1407
+ [
1408
+ 0.4444444444444444,
1409
+ "#bd3786"
1410
+ ],
1411
+ [
1412
+ 0.5555555555555556,
1413
+ "#d8576b"
1414
+ ],
1415
+ [
1416
+ 0.6666666666666666,
1417
+ "#ed7953"
1418
+ ],
1419
+ [
1420
+ 0.7777777777777778,
1421
+ "#fb9f3a"
1422
+ ],
1423
+ [
1424
+ 0.8888888888888888,
1425
+ "#fdca26"
1426
+ ],
1427
+ [
1428
+ 1,
1429
+ "#f0f921"
1430
+ ]
1431
+ ]
1432
+ },
1433
+ "colorway": [
1434
+ "#636efa",
1435
+ "#EF553B",
1436
+ "#00cc96",
1437
+ "#ab63fa",
1438
+ "#FFA15A",
1439
+ "#19d3f3",
1440
+ "#FF6692",
1441
+ "#B6E880",
1442
+ "#FF97FF",
1443
+ "#FECB52"
1444
+ ],
1445
+ "font": {
1446
+ "color": "#2a3f5f"
1447
+ },
1448
+ "geo": {
1449
+ "bgcolor": "white",
1450
+ "lakecolor": "white",
1451
+ "landcolor": "#E5ECF6",
1452
+ "showlakes": true,
1453
+ "showland": true,
1454
+ "subunitcolor": "white"
1455
+ },
1456
+ "hoverlabel": {
1457
+ "align": "left"
1458
+ },
1459
+ "hovermode": "closest",
1460
+ "mapbox": {
1461
+ "style": "light"
1462
+ },
1463
+ "paper_bgcolor": "white",
1464
+ "plot_bgcolor": "#E5ECF6",
1465
+ "polar": {
1466
+ "angularaxis": {
1467
+ "gridcolor": "white",
1468
+ "linecolor": "white",
1469
+ "ticks": ""
1470
+ },
1471
+ "bgcolor": "#E5ECF6",
1472
+ "radialaxis": {
1473
+ "gridcolor": "white",
1474
+ "linecolor": "white",
1475
+ "ticks": ""
1476
+ }
1477
+ },
1478
+ "scene": {
1479
+ "xaxis": {
1480
+ "backgroundcolor": "#E5ECF6",
1481
+ "gridcolor": "white",
1482
+ "gridwidth": 2,
1483
+ "linecolor": "white",
1484
+ "showbackground": true,
1485
+ "ticks": "",
1486
+ "zerolinecolor": "white"
1487
+ },
1488
+ "yaxis": {
1489
+ "backgroundcolor": "#E5ECF6",
1490
+ "gridcolor": "white",
1491
+ "gridwidth": 2,
1492
+ "linecolor": "white",
1493
+ "showbackground": true,
1494
+ "ticks": "",
1495
+ "zerolinecolor": "white"
1496
+ },
1497
+ "zaxis": {
1498
+ "backgroundcolor": "#E5ECF6",
1499
+ "gridcolor": "white",
1500
+ "gridwidth": 2,
1501
+ "linecolor": "white",
1502
+ "showbackground": true,
1503
+ "ticks": "",
1504
+ "zerolinecolor": "white"
1505
+ }
1506
+ },
1507
+ "shapedefaults": {
1508
+ "line": {
1509
+ "color": "#2a3f5f"
1510
+ }
1511
+ },
1512
+ "ternary": {
1513
+ "aaxis": {
1514
+ "gridcolor": "white",
1515
+ "linecolor": "white",
1516
+ "ticks": ""
1517
+ },
1518
+ "baxis": {
1519
+ "gridcolor": "white",
1520
+ "linecolor": "white",
1521
+ "ticks": ""
1522
+ },
1523
+ "bgcolor": "#E5ECF6",
1524
+ "caxis": {
1525
+ "gridcolor": "white",
1526
+ "linecolor": "white",
1527
+ "ticks": ""
1528
+ }
1529
+ },
1530
+ "title": {
1531
+ "x": 0.05
1532
+ },
1533
+ "xaxis": {
1534
+ "automargin": true,
1535
+ "gridcolor": "white",
1536
+ "linecolor": "white",
1537
+ "ticks": "",
1538
+ "title": {
1539
+ "standoff": 15
1540
+ },
1541
+ "zerolinecolor": "white",
1542
+ "zerolinewidth": 2
1543
+ },
1544
+ "yaxis": {
1545
+ "automargin": true,
1546
+ "gridcolor": "white",
1547
+ "linecolor": "white",
1548
+ "ticks": "",
1549
+ "title": {
1550
+ "standoff": 15
1551
+ },
1552
+ "zerolinecolor": "white",
1553
+ "zerolinewidth": 2
1554
+ }
1555
+ }
1556
+ },
1557
+ "xaxis": {
1558
+ "anchor": "y",
1559
+ "domain": [
1560
+ 0,
1561
+ 1
1562
+ ],
1563
+ "title": {
1564
+ "text": "date"
1565
+ }
1566
+ },
1567
+ "yaxis": {
1568
+ "anchor": "x",
1569
+ "domain": [
1570
+ 0,
1571
+ 1
1572
+ ],
1573
+ "title": {
1574
+ "text": "repo_notebook_count"
1575
+ }
1576
+ }
1577
+ }
1578
+ }
1579
+ },
1580
+ "metadata": {},
1581
+ "output_type": "display_data"
1582
+ }
1583
+ ],
1584
+ "source": [
1585
+ "px.line(pandas_df, x=\"date\", y=\"repo_notebook_count\", color=\"repo_type\")"
1586
+ ]
1587
+ },
1588
+ {
1589
+ "cell_type": "code",
1590
+ "execution_count": null,
1591
+ "metadata": {},
1592
+ "outputs": [],
1593
+ "source": []
1594
+ }
1595
+ ],
1596
+ "metadata": {
1597
+ "colab": {
1598
+ "name": "scratchpad",
1599
+ "provenance": []
1600
+ },
1601
+ "kernelspec": {
1602
+ "display_name": "Python 3",
1603
+ "name": "python3"
1604
+ },
1605
+ "language_info": {
1606
+ "codemirror_mode": {
1607
+ "name": "ipython",
1608
+ "version": 3
1609
+ },
1610
+ "file_extension": ".py",
1611
+ "mimetype": "text/x-python",
1612
+ "name": "python",
1613
+ "nbconvert_exporter": "python",
1614
+ "pygments_lexer": "ipython3",
1615
+ "version": "3.11.2"
1616
+ },
1617
+ "widgets": {
1618
+ "application/vnd.jupyter.widget-state+json": {
1619
+ "0b7aa472a433465bbe03d9f5a596d07c": {
1620
+ "model_module": "@jupyter-widgets/controls",
1621
+ "model_module_version": "1.5.0",
1622
+ "model_name": "HBoxModel",
1623
+ "state": {
1624
+ "_dom_classes": [],
1625
+ "_model_module": "@jupyter-widgets/controls",
1626
+ "_model_module_version": "1.5.0",
1627
+ "_model_name": "HBoxModel",
1628
+ "_view_count": null,
1629
+ "_view_module": "@jupyter-widgets/controls",
1630
+ "_view_module_version": "1.5.0",
1631
+ "_view_name": "HBoxView",
1632
+ "box_style": "",
1633
+ "children": [
1634
+ "IPY_MODEL_590d7059bf1e4c5285ca40e5298b7ee7",
1635
+ "IPY_MODEL_a82a83de315c43f6b7f9fa787227b2e3",
1636
+ "IPY_MODEL_4f66bea4e3ce4f508a407788729c958e"
1637
+ ],
1638
+ "layout": "IPY_MODEL_190d66e96c4b427e998fc412dd2bbd7b"
1639
+ }
1640
+ },
1641
+ "190d66e96c4b427e998fc412dd2bbd7b": {
1642
+ "model_module": "@jupyter-widgets/base",
1643
+ "model_module_version": "1.2.0",
1644
+ "model_name": "LayoutModel",
1645
+ "state": {
1646
+ "_model_module": "@jupyter-widgets/base",
1647
+ "_model_module_version": "1.2.0",
1648
+ "_model_name": "LayoutModel",
1649
+ "_view_count": null,
1650
+ "_view_module": "@jupyter-widgets/base",
1651
+ "_view_module_version": "1.2.0",
1652
+ "_view_name": "LayoutView",
1653
+ "align_content": null,
1654
+ "align_items": null,
1655
+ "align_self": null,
1656
+ "border": null,
1657
+ "bottom": null,
1658
+ "display": null,
1659
+ "flex": null,
1660
+ "flex_flow": null,
1661
+ "grid_area": null,
1662
+ "grid_auto_columns": null,
1663
+ "grid_auto_flow": null,
1664
+ "grid_auto_rows": null,
1665
+ "grid_column": null,
1666
+ "grid_gap": null,
1667
+ "grid_row": null,
1668
+ "grid_template_areas": null,
1669
+ "grid_template_columns": null,
1670
+ "grid_template_rows": null,
1671
+ "height": null,
1672
+ "justify_content": null,
1673
+ "justify_items": null,
1674
+ "left": null,
1675
+ "margin": null,
1676
+ "max_height": null,
1677
+ "max_width": null,
1678
+ "min_height": null,
1679
+ "min_width": null,
1680
+ "object_fit": null,
1681
+ "object_position": null,
1682
+ "order": null,
1683
+ "overflow": null,
1684
+ "overflow_x": null,
1685
+ "overflow_y": null,
1686
+ "padding": null,
1687
+ "right": null,
1688
+ "top": null,
1689
+ "visibility": "hidden",
1690
+ "width": null
1691
+ }
1692
+ },
1693
+ "266a59ce352c4a65aea3c6e8402ac1ed": {
1694
+ "model_module": "@jupyter-widgets/base",
1695
+ "model_module_version": "1.2.0",
1696
+ "model_name": "LayoutModel",
1697
+ "state": {
1698
+ "_model_module": "@jupyter-widgets/base",
1699
+ "_model_module_version": "1.2.0",
1700
+ "_model_name": "LayoutModel",
1701
+ "_view_count": null,
1702
+ "_view_module": "@jupyter-widgets/base",
1703
+ "_view_module_version": "1.2.0",
1704
+ "_view_name": "LayoutView",
1705
+ "align_content": null,
1706
+ "align_items": null,
1707
+ "align_self": null,
1708
+ "border": null,
1709
+ "bottom": null,
1710
+ "display": null,
1711
+ "flex": null,
1712
+ "flex_flow": null,
1713
+ "grid_area": null,
1714
+ "grid_auto_columns": null,
1715
+ "grid_auto_flow": null,
1716
+ "grid_auto_rows": null,
1717
+ "grid_column": null,
1718
+ "grid_gap": null,
1719
+ "grid_row": null,
1720
+ "grid_template_areas": null,
1721
+ "grid_template_columns": null,
1722
+ "grid_template_rows": null,
1723
+ "height": null,
1724
+ "justify_content": null,
1725
+ "justify_items": null,
1726
+ "left": null,
1727
+ "margin": null,
1728
+ "max_height": null,
1729
+ "max_width": null,
1730
+ "min_height": null,
1731
+ "min_width": null,
1732
+ "object_fit": null,
1733
+ "object_position": null,
1734
+ "order": null,
1735
+ "overflow": null,
1736
+ "overflow_x": null,
1737
+ "overflow_y": null,
1738
+ "padding": null,
1739
+ "right": null,
1740
+ "top": null,
1741
+ "visibility": null,
1742
+ "width": null
1743
+ }
1744
+ },
1745
+ "4c31add106194a728e308ed486fc2e54": {
1746
+ "model_module": "@jupyter-widgets/controls",
1747
+ "model_module_version": "1.5.0",
1748
+ "model_name": "ProgressStyleModel",
1749
+ "state": {
1750
+ "_model_module": "@jupyter-widgets/controls",
1751
+ "_model_module_version": "1.5.0",
1752
+ "_model_name": "ProgressStyleModel",
1753
+ "_view_count": null,
1754
+ "_view_module": "@jupyter-widgets/base",
1755
+ "_view_module_version": "1.2.0",
1756
+ "_view_name": "StyleView",
1757
+ "bar_color": null,
1758
+ "description_width": ""
1759
+ }
1760
+ },
1761
+ "4f66bea4e3ce4f508a407788729c958e": {
1762
+ "model_module": "@jupyter-widgets/controls",
1763
+ "model_module_version": "1.5.0",
1764
+ "model_name": "HTMLModel",
1765
+ "state": {
1766
+ "_dom_classes": [],
1767
+ "_model_module": "@jupyter-widgets/controls",
1768
+ "_model_module_version": "1.5.0",
1769
+ "_model_name": "HTMLModel",
1770
+ "_view_count": null,
1771
+ "_view_module": "@jupyter-widgets/controls",
1772
+ "_view_module_version": "1.5.0",
1773
+ "_view_name": "HTMLView",
1774
+ "description": "",
1775
+ "description_tooltip": null,
1776
+ "layout": "IPY_MODEL_266a59ce352c4a65aea3c6e8402ac1ed",
1777
+ "placeholder": "​",
1778
+ "style": "IPY_MODEL_c28bf5f9ac624739a48d62510d3bb05d",
1779
+ "value": " 2490/0 [02:11&lt;00:00, 236.08 examples/s]"
1780
+ }
1781
+ },
1782
+ "590d7059bf1e4c5285ca40e5298b7ee7": {
1783
+ "model_module": "@jupyter-widgets/controls",
1784
+ "model_module_version": "1.5.0",
1785
+ "model_name": "HTMLModel",
1786
+ "state": {
1787
+ "_dom_classes": [],
1788
+ "_model_module": "@jupyter-widgets/controls",
1789
+ "_model_module_version": "1.5.0",
1790
+ "_model_name": "HTMLModel",
1791
+ "_view_count": null,
1792
+ "_view_module": "@jupyter-widgets/controls",
1793
+ "_view_module_version": "1.5.0",
1794
+ "_view_name": "HTMLView",
1795
+ "description": "",
1796
+ "description_tooltip": null,
1797
+ "layout": "IPY_MODEL_711a5f5d14ac4af180b54954e814f394",
1798
+ "placeholder": "​",
1799
+ "style": "IPY_MODEL_605acdd7a4cb4f118951fc79933f62f6",
1800
+ "value": "Generating train split: "
1801
+ }
1802
+ },
1803
+ "605acdd7a4cb4f118951fc79933f62f6": {
1804
+ "model_module": "@jupyter-widgets/controls",
1805
+ "model_module_version": "1.5.0",
1806
+ "model_name": "DescriptionStyleModel",
1807
+ "state": {
1808
+ "_model_module": "@jupyter-widgets/controls",
1809
+ "_model_module_version": "1.5.0",
1810
+ "_model_name": "DescriptionStyleModel",
1811
+ "_view_count": null,
1812
+ "_view_module": "@jupyter-widgets/base",
1813
+ "_view_module_version": "1.2.0",
1814
+ "_view_name": "StyleView",
1815
+ "description_width": ""
1816
+ }
1817
+ },
1818
+ "711a5f5d14ac4af180b54954e814f394": {
1819
+ "model_module": "@jupyter-widgets/base",
1820
+ "model_module_version": "1.2.0",
1821
+ "model_name": "LayoutModel",
1822
+ "state": {
1823
+ "_model_module": "@jupyter-widgets/base",
1824
+ "_model_module_version": "1.2.0",
1825
+ "_model_name": "LayoutModel",
1826
+ "_view_count": null,
1827
+ "_view_module": "@jupyter-widgets/base",
1828
+ "_view_module_version": "1.2.0",
1829
+ "_view_name": "LayoutView",
1830
+ "align_content": null,
1831
+ "align_items": null,
1832
+ "align_self": null,
1833
+ "border": null,
1834
+ "bottom": null,
1835
+ "display": null,
1836
+ "flex": null,
1837
+ "flex_flow": null,
1838
+ "grid_area": null,
1839
+ "grid_auto_columns": null,
1840
+ "grid_auto_flow": null,
1841
+ "grid_auto_rows": null,
1842
+ "grid_column": null,
1843
+ "grid_gap": null,
1844
+ "grid_row": null,
1845
+ "grid_template_areas": null,
1846
+ "grid_template_columns": null,
1847
+ "grid_template_rows": null,
1848
+ "height": null,
1849
+ "justify_content": null,
1850
+ "justify_items": null,
1851
+ "left": null,
1852
+ "margin": null,
1853
+ "max_height": null,
1854
+ "max_width": null,
1855
+ "min_height": null,
1856
+ "min_width": null,
1857
+ "object_fit": null,
1858
+ "object_position": null,
1859
+ "order": null,
1860
+ "overflow": null,
1861
+ "overflow_x": null,
1862
+ "overflow_y": null,
1863
+ "padding": null,
1864
+ "right": null,
1865
+ "top": null,
1866
+ "visibility": null,
1867
+ "width": null
1868
+ }
1869
+ },
1870
+ "a82a83de315c43f6b7f9fa787227b2e3": {
1871
+ "model_module": "@jupyter-widgets/controls",
1872
+ "model_module_version": "1.5.0",
1873
+ "model_name": "FloatProgressModel",
1874
+ "state": {
1875
+ "_dom_classes": [],
1876
+ "_model_module": "@jupyter-widgets/controls",
1877
+ "_model_module_version": "1.5.0",
1878
+ "_model_name": "FloatProgressModel",
1879
+ "_view_count": null,
1880
+ "_view_module": "@jupyter-widgets/controls",
1881
+ "_view_module_version": "1.5.0",
1882
+ "_view_name": "ProgressView",
1883
+ "bar_style": "info",
1884
+ "description": "",
1885
+ "description_tooltip": null,
1886
+ "layout": "IPY_MODEL_c13a0581f0ff46c4a399627cd8aa1c0c",
1887
+ "max": 1,
1888
+ "min": 0,
1889
+ "orientation": "horizontal",
1890
+ "style": "IPY_MODEL_4c31add106194a728e308ed486fc2e54",
1891
+ "value": 1
1892
+ }
1893
+ },
1894
+ "c13a0581f0ff46c4a399627cd8aa1c0c": {
1895
+ "model_module": "@jupyter-widgets/base",
1896
+ "model_module_version": "1.2.0",
1897
+ "model_name": "LayoutModel",
1898
+ "state": {
1899
+ "_model_module": "@jupyter-widgets/base",
1900
+ "_model_module_version": "1.2.0",
1901
+ "_model_name": "LayoutModel",
1902
+ "_view_count": null,
1903
+ "_view_module": "@jupyter-widgets/base",
1904
+ "_view_module_version": "1.2.0",
1905
+ "_view_name": "LayoutView",
1906
+ "align_content": null,
1907
+ "align_items": null,
1908
+ "align_self": null,
1909
+ "border": null,
1910
+ "bottom": null,
1911
+ "display": null,
1912
+ "flex": null,
1913
+ "flex_flow": null,
1914
+ "grid_area": null,
1915
+ "grid_auto_columns": null,
1916
+ "grid_auto_flow": null,
1917
+ "grid_auto_rows": null,
1918
+ "grid_column": null,
1919
+ "grid_gap": null,
1920
+ "grid_row": null,
1921
+ "grid_template_areas": null,
1922
+ "grid_template_columns": null,
1923
+ "grid_template_rows": null,
1924
+ "height": null,
1925
+ "justify_content": null,
1926
+ "justify_items": null,
1927
+ "left": null,
1928
+ "margin": null,
1929
+ "max_height": null,
1930
+ "max_width": null,
1931
+ "min_height": null,
1932
+ "min_width": null,
1933
+ "object_fit": null,
1934
+ "object_position": null,
1935
+ "order": null,
1936
+ "overflow": null,
1937
+ "overflow_x": null,
1938
+ "overflow_y": null,
1939
+ "padding": null,
1940
+ "right": null,
1941
+ "top": null,
1942
+ "visibility": null,
1943
+ "width": "20px"
1944
+ }
1945
+ },
1946
+ "c28bf5f9ac624739a48d62510d3bb05d": {
1947
+ "model_module": "@jupyter-widgets/controls",
1948
+ "model_module_version": "1.5.0",
1949
+ "model_name": "DescriptionStyleModel",
1950
+ "state": {
1951
+ "_model_module": "@jupyter-widgets/controls",
1952
+ "_model_module_version": "1.5.0",
1953
+ "_model_name": "DescriptionStyleModel",
1954
+ "_view_count": null,
1955
+ "_view_module": "@jupyter-widgets/base",
1956
+ "_view_module_version": "1.2.0",
1957
+ "_view_name": "StyleView",
1958
+ "description_width": ""
1959
+ }
1960
+ }
1961
+ }
1962
+ }
1963
+ },
1964
+ "nbformat": 4,
1965
+ "nbformat_minor": 0
1966
+ }
requirements.in ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ polars
2
+ datasets
3
+ pandas
4
+ toolz
5
+ matplotlib
6
+ gradio
7
+ plotly
8
+ gradio
requirements.txt ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile --resolver=backtracking requirements.in
6
+ #
7
+ aiofiles==23.1.0
8
+ # via gradio
9
+ aiohttp==3.8.4
10
+ # via
11
+ # datasets
12
+ # fsspec
13
+ # gradio
14
+ aiosignal==1.3.1
15
+ # via aiohttp
16
+ altair==4.2.2
17
+ # via gradio
18
+ anyio==3.6.2
19
+ # via
20
+ # httpcore
21
+ # starlette
22
+ async-timeout==4.0.2
23
+ # via aiohttp
24
+ attrs==22.2.0
25
+ # via
26
+ # aiohttp
27
+ # jsonschema
28
+ certifi==2022.12.7
29
+ # via
30
+ # httpcore
31
+ # httpx
32
+ # requests
33
+ charset-normalizer==3.1.0
34
+ # via
35
+ # aiohttp
36
+ # requests
37
+ click==8.1.3
38
+ # via uvicorn
39
+ contourpy==1.0.7
40
+ # via matplotlib
41
+ cycler==0.11.0
42
+ # via matplotlib
43
+ datasets==2.10.1
44
+ # via -r requirements.in
45
+ dill==0.3.6
46
+ # via
47
+ # datasets
48
+ # multiprocess
49
+ entrypoints==0.4
50
+ # via altair
51
+ fastapi==0.95.0
52
+ # via gradio
53
+ ffmpy==0.3.0
54
+ # via gradio
55
+ filelock==3.10.4
56
+ # via huggingface-hub
57
+ fonttools==4.39.2
58
+ # via matplotlib
59
+ frozenlist==1.3.3
60
+ # via
61
+ # aiohttp
62
+ # aiosignal
63
+ fsspec[http]==2023.3.0
64
+ # via
65
+ # datasets
66
+ # gradio
67
+ gradio==3.23.0
68
+ # via -r requirements.in
69
+ h11==0.14.0
70
+ # via
71
+ # httpcore
72
+ # uvicorn
73
+ httpcore==0.16.3
74
+ # via httpx
75
+ httpx==0.23.3
76
+ # via gradio
77
+ huggingface-hub==0.13.3
78
+ # via
79
+ # datasets
80
+ # gradio
81
+ idna==3.4
82
+ # via
83
+ # anyio
84
+ # requests
85
+ # rfc3986
86
+ # yarl
87
+ jinja2==3.1.2
88
+ # via
89
+ # altair
90
+ # gradio
91
+ jsonschema==4.17.3
92
+ # via altair
93
+ kiwisolver==1.4.4
94
+ # via matplotlib
95
+ linkify-it-py==2.0.0
96
+ # via markdown-it-py
97
+ markdown-it-py[linkify]==2.2.0
98
+ # via
99
+ # gradio
100
+ # mdit-py-plugins
101
+ markupsafe==2.1.2
102
+ # via
103
+ # gradio
104
+ # jinja2
105
+ matplotlib==3.7.1
106
+ # via
107
+ # -r requirements.in
108
+ # gradio
109
+ mdit-py-plugins==0.3.3
110
+ # via gradio
111
+ mdurl==0.1.2
112
+ # via markdown-it-py
113
+ multidict==6.0.4
114
+ # via
115
+ # aiohttp
116
+ # yarl
117
+ multiprocess==0.70.14
118
+ # via datasets
119
+ numpy==1.24.2
120
+ # via
121
+ # altair
122
+ # contourpy
123
+ # datasets
124
+ # gradio
125
+ # matplotlib
126
+ # pandas
127
+ # pyarrow
128
+ orjson==3.8.8
129
+ # via gradio
130
+ packaging==23.0
131
+ # via
132
+ # datasets
133
+ # huggingface-hub
134
+ # matplotlib
135
+ pandas==1.5.3
136
+ # via
137
+ # -r requirements.in
138
+ # altair
139
+ # datasets
140
+ # gradio
141
+ pillow==9.4.0
142
+ # via
143
+ # gradio
144
+ # matplotlib
145
+ plotly==5.13.1
146
+ # via -r requirements.in
147
+ polars==0.16.15
148
+ # via -r requirements.in
149
+ pyarrow==11.0.0
150
+ # via datasets
151
+ pydantic==1.10.7
152
+ # via
153
+ # fastapi
154
+ # gradio
155
+ pydub==0.25.1
156
+ # via gradio
157
+ pyparsing==3.0.9
158
+ # via matplotlib
159
+ pyrsistent==0.19.3
160
+ # via jsonschema
161
+ python-dateutil==2.8.2
162
+ # via
163
+ # matplotlib
164
+ # pandas
165
+ python-multipart==0.0.6
166
+ # via gradio
167
+ pytz==2022.7.1
168
+ # via pandas
169
+ pyyaml==6.0
170
+ # via
171
+ # datasets
172
+ # gradio
173
+ # huggingface-hub
174
+ requests==2.28.2
175
+ # via
176
+ # datasets
177
+ # fsspec
178
+ # gradio
179
+ # huggingface-hub
180
+ # responses
181
+ responses==0.18.0
182
+ # via datasets
183
+ rfc3986[idna2008]==1.5.0
184
+ # via httpx
185
+ semantic-version==2.10.0
186
+ # via gradio
187
+ six==1.16.0
188
+ # via python-dateutil
189
+ sniffio==1.3.0
190
+ # via
191
+ # anyio
192
+ # httpcore
193
+ # httpx
194
+ starlette==0.26.1
195
+ # via fastapi
196
+ tenacity==8.2.2
197
+ # via plotly
198
+ toolz==0.12.0
199
+ # via
200
+ # -r requirements.in
201
+ # altair
202
+ tqdm==4.65.0
203
+ # via
204
+ # datasets
205
+ # huggingface-hub
206
+ typing-extensions==4.5.0
207
+ # via
208
+ # gradio
209
+ # huggingface-hub
210
+ # pydantic
211
+ uc-micro-py==1.0.1
212
+ # via linkify-it-py
213
+ urllib3==1.26.15
214
+ # via
215
+ # requests
216
+ # responses
217
+ uvicorn==0.21.1
218
+ # via gradio
219
+ websockets==10.4
220
+ # via gradio
221
+ xxhash==3.2.0
222
+ # via datasets
223
+ yarl==1.8.2
224
+ # via aiohttp