dashboard / app.py
librarian-bot's picture
Update app.py
1722c11
raw
history blame
3.38 kB
from typing import Any, List
import gradio as gr
from toolz import concat, frequencies
import httpx
from functools import lru_cache
import pandas as pd
import plotly.express as px
import polars as pl
from pathlib import Path
from datetime import datetime
librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
@lru_cache(maxsize=512)
def get_hub_community_activity(user: str) -> List[Any]:
all_data = []
for i in range(1, 2000, 100):
r = httpx.get(
f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}"
)
activity = r.json()["recentActivity"]
all_data.append(activity)
return list(concat(all_data))
@lru_cache(maxsize=512)
def get_pr_status(user: str):
all_data = get_hub_community_activity(user)
pr_data = (
x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"]
)
return frequencies(x["status"] for x in pr_data)
def create_pie():
frequencies = get_pr_status("librarian-bot")
df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()})
fig = px.pie(df, values="number", names="status", template="seaborn")
return gr.Plot(fig)
def parse_date_time(date_time: str) -> datetime:
return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
def parse_pr_data(data):
data = data["discussionData"]
createdAt = parse_date_time(data["createdAt"])
pr_number = data["num"]
status = data["status"]
repo_id = data["repo"]["name"]
return {
"createdAt": createdAt,
"pr_number": pr_number,
"status": status,
"repo_id": repo_id,
}
def group_status_by_pr_number():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
return (
pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas()
)
def plot_over_time():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date))
df = df.pivot(
values=["status"],
index=["createdAt"],
columns=["status"],
aggregate_function="count",
)
df = df.fill_null(0)
df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt")
df = df.to_pandas().set_index("createdAt").cumsum()
return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"])
with gr.Blocks() as demo:
frequencies = get_pr_status("librarian-bot")
gr.HTML(Path("description.html").read_text())
gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
with gr.Column():
gr.Markdown("## Pull requests Status")
gr.Markdown(
"The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged"
)
create_pie()
with gr.Column():
gr.Markdown("Pull requests opened, closed and merged over time (cumulative)")
gr.Plot(plot_over_time())
with gr.Column():
gr.Markdown("## Pull requests status by PR number")
gr.DataFrame(group_status_by_pr_number())
demo.launch(debug=True)