librarian-bot commited on
Commit
4992d0d
·
1 Parent(s): 931a9a7

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +100 -0
  2. description.html +21 -0
  3. requirements.in +8 -0
  4. requirements.txt +226 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, List
2
+ import gradio as gr
3
+ from toolz import concat, frequencies
4
+ import httpx
5
+ from functools import lru_cache
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import polars as pl
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+
12
+ librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
13
+
14
+
15
+ @lru_cache(maxsize=512)
16
+ def get_hub_community_activity(user: str) -> List[Any]:
17
+ all_data = []
18
+ for i in range(1, 2000, 100):
19
+ r = httpx.get(
20
+ f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}"
21
+ )
22
+ activity = r.json()["recentActivity"]
23
+ all_data.append(activity)
24
+ return list(concat(all_data))
25
+
26
+
27
+ @lru_cache(maxsize=512)
28
+ def get_pr_status(user: str):
29
+ all_data = get_hub_community_activity(user)
30
+ pr_data = (
31
+ x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"]
32
+ )
33
+ return frequencies(x["status"] for x in pr_data)
34
+
35
+
36
+ def create_pie():
37
+ frequencies = get_pr_status("librarian-bot")
38
+ df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()})
39
+ fig = px.pie(df, values="number", names="status", template="seaborn")
40
+ return gr.Plot(fig)
41
+
42
+
43
+ def parse_date_time(date_time: str) -> datetime:
44
+ return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
45
+
46
+
47
+ def parse_pr_data(data):
48
+ data = data["discussionData"]
49
+ createdAt = parse_date_time(data["createdAt"])
50
+ pr_number = data["num"]
51
+ status = data["status"]
52
+ repo_id = data["repo"]["name"]
53
+ return {
54
+ "createdAt": createdAt,
55
+ "pr_number": pr_number,
56
+ "status": status,
57
+ "repo_id": repo_id,
58
+ }
59
+
60
+
61
+ def group_status_by_pr_number():
62
+ all_data = get_hub_community_activity("librarian-bot")
63
+ all_data = [parse_pr_data(d) for d in all_data]
64
+ return (
65
+ pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas()
66
+ )
67
+
68
+
69
+ def plot_over_time():
70
+ all_data = get_hub_community_activity("librarian-bot")
71
+ all_data = [parse_pr_data(d) for d in all_data]
72
+ df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date))
73
+ df = df.pivot(
74
+ values=["status"],
75
+ index=["createdAt"],
76
+ columns=["status"],
77
+ aggregate_function="count",
78
+ )
79
+ df = df.fill_null(0)
80
+ df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt")
81
+ df = df.to_pandas().set_index("createdAt").cumsum()
82
+ return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"])
83
+
84
+
85
+ with gr.Blocks() as demo:
86
+ frequencies = get_pr_status("librarian-bot")
87
+ gr.HTML(Path("description.html").read_text())
88
+ gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
89
+ with gr.Column():
90
+ gr.Markdown("## Pull requests Status")
91
+ gr.Markdown(
92
+ "The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged"
93
+ )
94
+ create_pie()
95
+ with gr.Column():
96
+ gr.Markdown("## Pull requests status by PR number")
97
+ gr.DataFrame(group_status_by_pr_number())
98
+ with gr.Column():
99
+ gr.Plot(plot_over_time())
100
+ demo.launch(debug=True)
description.html ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta name="viewport" content="width=device-width, initial-scale=1">
5
+ <style>
6
+ .avatar {
7
+ vertical-align: middle;
8
+ width: 50px;
9
+ height: 50px;
10
+ border-radius: 50%;
11
+ }
12
+ </style>
13
+ </head>
14
+ <body>
15
+
16
+ <h1>Librarian Bot Dashboard</h2>
17
+ <img align="left" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face" alt="Avatar" class="avatar">
18
+ <p>Librarian-bot is a bot that suggests changes to metadata for models and datasets hosted on the hub. This dashboard gives an overview of these pull requests</p>
19
+
20
+ </body>
21
+ </html>
requirements.in ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ httpx
2
+ toolz
3
+ gradio
4
+ plotly
5
+ pandas
6
+ polars
7
+ datasets
8
+ pyarrow
requirements.txt ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile --resolver=backtracking requirements.in
6
+ #
7
+ aiofiles==23.1.0
8
+ # via gradio
9
+ aiohttp==3.8.4
10
+ # via
11
+ # datasets
12
+ # fsspec
13
+ # gradio
14
+ aiosignal==1.3.1
15
+ # via aiohttp
16
+ altair==4.2.2
17
+ # via gradio
18
+ anyio==3.6.2
19
+ # via
20
+ # httpcore
21
+ # starlette
22
+ async-timeout==4.0.2
23
+ # via aiohttp
24
+ attrs==22.2.0
25
+ # via
26
+ # aiohttp
27
+ # jsonschema
28
+ certifi==2022.12.7
29
+ # via
30
+ # httpcore
31
+ # httpx
32
+ # requests
33
+ charset-normalizer==3.1.0
34
+ # via
35
+ # aiohttp
36
+ # requests
37
+ click==8.1.3
38
+ # via uvicorn
39
+ contourpy==1.0.7
40
+ # via matplotlib
41
+ cycler==0.11.0
42
+ # via matplotlib
43
+ datasets==2.10.1
44
+ # via -r requirements.in
45
+ dill==0.3.6
46
+ # via
47
+ # datasets
48
+ # multiprocess
49
+ entrypoints==0.4
50
+ # via altair
51
+ fastapi==0.95.0
52
+ # via gradio
53
+ ffmpy==0.3.0
54
+ # via gradio
55
+ filelock==3.10.6
56
+ # via huggingface-hub
57
+ fonttools==4.39.2
58
+ # via matplotlib
59
+ frozenlist==1.3.3
60
+ # via
61
+ # aiohttp
62
+ # aiosignal
63
+ fsspec[http]==2023.3.0
64
+ # via
65
+ # datasets
66
+ # gradio
67
+ gradio==3.23.0
68
+ # via -r requirements.in
69
+ h11==0.14.0
70
+ # via
71
+ # httpcore
72
+ # uvicorn
73
+ httpcore==0.16.3
74
+ # via httpx
75
+ httpx==0.23.3
76
+ # via
77
+ # -r requirements.in
78
+ # gradio
79
+ huggingface-hub==0.13.3
80
+ # via
81
+ # datasets
82
+ # gradio
83
+ idna==3.4
84
+ # via
85
+ # anyio
86
+ # requests
87
+ # rfc3986
88
+ # yarl
89
+ jinja2==3.1.2
90
+ # via
91
+ # altair
92
+ # gradio
93
+ jsonschema==4.17.3
94
+ # via altair
95
+ kiwisolver==1.4.4
96
+ # via matplotlib
97
+ linkify-it-py==2.0.0
98
+ # via markdown-it-py
99
+ markdown-it-py[linkify]==2.2.0
100
+ # via
101
+ # gradio
102
+ # mdit-py-plugins
103
+ markupsafe==2.1.2
104
+ # via
105
+ # gradio
106
+ # jinja2
107
+ matplotlib==3.7.1
108
+ # via gradio
109
+ mdit-py-plugins==0.3.3
110
+ # via gradio
111
+ mdurl==0.1.2
112
+ # via markdown-it-py
113
+ multidict==6.0.4
114
+ # via
115
+ # aiohttp
116
+ # yarl
117
+ multiprocess==0.70.14
118
+ # via datasets
119
+ numpy==1.24.2
120
+ # via
121
+ # altair
122
+ # contourpy
123
+ # datasets
124
+ # gradio
125
+ # matplotlib
126
+ # pandas
127
+ # pyarrow
128
+ orjson==3.8.8
129
+ # via gradio
130
+ packaging==23.0
131
+ # via
132
+ # datasets
133
+ # huggingface-hub
134
+ # matplotlib
135
+ pandas==1.5.3
136
+ # via
137
+ # -r requirements.in
138
+ # altair
139
+ # datasets
140
+ # gradio
141
+ pillow==9.4.0
142
+ # via
143
+ # gradio
144
+ # matplotlib
145
+ plotly==5.13.1
146
+ # via -r requirements.in
147
+ polars==0.16.16
148
+ # via -r requirements.in
149
+ pyarrow==11.0.0
150
+ # via
151
+ # -r requirements.in
152
+ # datasets
153
+ pydantic==1.10.7
154
+ # via
155
+ # fastapi
156
+ # gradio
157
+ pydub==0.25.1
158
+ # via gradio
159
+ pyparsing==3.0.9
160
+ # via matplotlib
161
+ pyrsistent==0.19.3
162
+ # via jsonschema
163
+ python-dateutil==2.8.2
164
+ # via
165
+ # matplotlib
166
+ # pandas
167
+ python-multipart==0.0.6
168
+ # via gradio
169
+ pytz==2023.2
170
+ # via pandas
171
+ pyyaml==6.0
172
+ # via
173
+ # datasets
174
+ # gradio
175
+ # huggingface-hub
176
+ requests==2.28.2
177
+ # via
178
+ # datasets
179
+ # fsspec
180
+ # gradio
181
+ # huggingface-hub
182
+ # responses
183
+ responses==0.18.0
184
+ # via datasets
185
+ rfc3986[idna2008]==1.5.0
186
+ # via httpx
187
+ semantic-version==2.10.0
188
+ # via gradio
189
+ six==1.16.0
190
+ # via python-dateutil
191
+ sniffio==1.3.0
192
+ # via
193
+ # anyio
194
+ # httpcore
195
+ # httpx
196
+ starlette==0.26.1
197
+ # via fastapi
198
+ tenacity==8.2.2
199
+ # via plotly
200
+ toolz==0.12.0
201
+ # via
202
+ # -r requirements.in
203
+ # altair
204
+ tqdm==4.65.0
205
+ # via
206
+ # datasets
207
+ # huggingface-hub
208
+ typing-extensions==4.5.0
209
+ # via
210
+ # gradio
211
+ # huggingface-hub
212
+ # pydantic
213
+ uc-micro-py==1.0.1
214
+ # via linkify-it-py
215
+ urllib3==1.26.15
216
+ # via
217
+ # requests
218
+ # responses
219
+ uvicorn==0.21.1
220
+ # via gradio
221
+ websockets==10.4
222
+ # via gradio
223
+ xxhash==3.2.0
224
+ # via datasets
225
+ yarl==1.8.2
226
+ # via aiohttp