Spaces:
Runtime error
Runtime error
File size: 4,166 Bytes
4992d0d b9692b3 4992d0d b9692b3 0b68578 c22cfb8 4992d0d b9692b3 4992d0d b9692b3 4992d0d b9692b3 4992d0d b9692b3 e72c447 b9692b3 4992d0d e72c447 4992d0d b9692b3 4992d0d b9692b3 4992d0d b9692b3 4992d0d 1722c11 4992d0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from typing import Any, List
import gradio as gr
from toolz import concat
import httpx
import plotly.express as px
import polars as pl
from pathlib import Path
from datasets import load_dataset
from cachetools import TTLCache, cached
from datetime import datetime, timedelta
from datasets import Dataset
import os
token = os.getenv["HUGGINGFACE_TOKEN"]
assert token
librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
@cached(cache=TTLCache(maxsize=1000, ttl=timedelta(minutes=10), timer=datetime.now))
def get_hub_community_activity(user: str) -> List[Any]:
all_data = []
for i in range(1, 2000, 100):
r = httpx.get(
f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}"
)
activity = r.json()["recentActivity"]
all_data.append(activity)
return list(concat(all_data))
def parse_date_time(date_time: str) -> datetime:
return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
def parse_pr_data(data):
data = data["discussionData"]
createdAt = parse_date_time(data["createdAt"])
pr_number = data["num"]
status = data["status"]
repo_id = data["repo"]["name"]
repo_type = data["repo"]["type"]
isPullRequest = data["isPullRequest"]
return {
"createdAt": createdAt,
"pr_number": pr_number,
"status": status,
"repo_id": repo_id,
"type": repo_type,
"isPullRequest": isPullRequest,
}
@cached(cache=TTLCache(maxsize=1000, ttl=timedelta(minutes=30), timer=datetime.now))
def update_data():
previous_df = pl.DataFrame(
load_dataset("librarian-bot/stats", split="train").data.table
)
data = get_hub_community_activity("librarian-bot")
data = [parse_pr_data(d) for d in data]
update_df = pl.DataFrame(data)
df = pl.concat([previous_df, update_df]).unique()
Dataset(df.to_arrow()).push_to_hub("librarian-bot/stats", token=token)
return df
# def get_pr_status():
# df = update_data()
# df = df.filter(pl.col("isPullRequest") is True)
# return df.select(pl.col("status").value_counts())
# # return frequencies(x["status"] for x in pr_data)
def create_pie():
df = update_data()
df = df.filter(pl.col("isPullRequest") is True)
df = df["status"].value_counts().to_pandas()
fig = px.pie(df, values="counts", names="status", template="seaborn")
return gr.Plot(fig)
def group_status_by_pr_number():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
return (
pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas()
)
def plot_over_time():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date))
df = df.pivot(
values=["status"],
index=["createdAt"],
columns=["status"],
aggregate_function="count",
)
df = df.fill_null(0)
df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt")
df = df.to_pandas().set_index("createdAt").cumsum()
return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"])
df = update_data()
with gr.Blocks() as demo:
# frequencies = get_pr_status("librarian-bot")
gr.HTML(Path("description.html").read_text())
# gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
with gr.Column():
gr.Markdown("## Pull requests Status")
gr.Markdown(
"The below pie chart shows the percentage of pull requests made by"
" librarian bot that are open, closed or merged"
)
create_pie()
with gr.Column():
gr.Markdown("Pull requests opened, closed and merged over time (cumulative)")
gr.Plot(plot_over_time())
with gr.Column():
gr.Markdown("## Pull requests status by PR number")
gr.DataFrame(group_status_by_pr_number())
demo.launch(debug=True)
|