Spaces:
Runtime error
Runtime error
File size: 3,377 Bytes
4992d0d 1722c11 4992d0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from typing import Any, List
import gradio as gr
from toolz import concat, frequencies
import httpx
from functools import lru_cache
import pandas as pd
import plotly.express as px
import polars as pl
from pathlib import Path
from datetime import datetime
librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
@lru_cache(maxsize=512)
def get_hub_community_activity(user: str) -> List[Any]:
all_data = []
for i in range(1, 2000, 100):
r = httpx.get(
f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}"
)
activity = r.json()["recentActivity"]
all_data.append(activity)
return list(concat(all_data))
@lru_cache(maxsize=512)
def get_pr_status(user: str):
all_data = get_hub_community_activity(user)
pr_data = (
x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"]
)
return frequencies(x["status"] for x in pr_data)
def create_pie():
frequencies = get_pr_status("librarian-bot")
df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()})
fig = px.pie(df, values="number", names="status", template="seaborn")
return gr.Plot(fig)
def parse_date_time(date_time: str) -> datetime:
return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
def parse_pr_data(data):
data = data["discussionData"]
createdAt = parse_date_time(data["createdAt"])
pr_number = data["num"]
status = data["status"]
repo_id = data["repo"]["name"]
return {
"createdAt": createdAt,
"pr_number": pr_number,
"status": status,
"repo_id": repo_id,
}
def group_status_by_pr_number():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
return (
pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas()
)
def plot_over_time():
all_data = get_hub_community_activity("librarian-bot")
all_data = [parse_pr_data(d) for d in all_data]
df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date))
df = df.pivot(
values=["status"],
index=["createdAt"],
columns=["status"],
aggregate_function="count",
)
df = df.fill_null(0)
df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt")
df = df.to_pandas().set_index("createdAt").cumsum()
return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"])
with gr.Blocks() as demo:
frequencies = get_pr_status("librarian-bot")
gr.HTML(Path("description.html").read_text())
gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
with gr.Column():
gr.Markdown("## Pull requests Status")
gr.Markdown(
"The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged"
)
create_pie()
with gr.Column():
gr.Markdown("Pull requests opened, closed and merged over time (cumulative)")
gr.Plot(plot_over_time())
with gr.Column():
gr.Markdown("## Pull requests status by PR number")
gr.DataFrame(group_status_by_pr_number())
demo.launch(debug=True)
|