File size: 3,377 Bytes
4992d0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1722c11
 
 
4992d0d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from typing import Any, List
import gradio as gr
from toolz import concat, frequencies
import httpx
from functools import lru_cache
import pandas as pd
import plotly.express as px
import polars as pl
from pathlib import Path
from datetime import datetime

librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"


@lru_cache(maxsize=512)
def get_hub_community_activity(user: str) -> List[Any]:
    all_data = []
    for i in range(1, 2000, 100):
        r = httpx.get(
            f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}"
        )
        activity = r.json()["recentActivity"]
        all_data.append(activity)
    return list(concat(all_data))


@lru_cache(maxsize=512)
def get_pr_status(user: str):
    all_data = get_hub_community_activity(user)
    pr_data = (
        x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"]
    )
    return frequencies(x["status"] for x in pr_data)


def create_pie():
    frequencies = get_pr_status("librarian-bot")
    df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()})
    fig = px.pie(df, values="number", names="status", template="seaborn")
    return gr.Plot(fig)


def parse_date_time(date_time: str) -> datetime:
    return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")


def parse_pr_data(data):
    data = data["discussionData"]
    createdAt = parse_date_time(data["createdAt"])
    pr_number = data["num"]
    status = data["status"]
    repo_id = data["repo"]["name"]
    return {
        "createdAt": createdAt,
        "pr_number": pr_number,
        "status": status,
        "repo_id": repo_id,
    }


def group_status_by_pr_number():
    all_data = get_hub_community_activity("librarian-bot")
    all_data = [parse_pr_data(d) for d in all_data]
    return (
        pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas()
    )


def plot_over_time():
    all_data = get_hub_community_activity("librarian-bot")
    all_data = [parse_pr_data(d) for d in all_data]
    df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date))
    df = df.pivot(
        values=["status"],
        index=["createdAt"],
        columns=["status"],
        aggregate_function="count",
    )
    df = df.fill_null(0)
    df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt")
    df = df.to_pandas().set_index("createdAt").cumsum()
    return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"])


with gr.Blocks() as demo:
    frequencies = get_pr_status("librarian-bot")
    gr.HTML(Path("description.html").read_text())
    gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
    with gr.Column():
        gr.Markdown("## Pull requests Status")
        gr.Markdown(
            "The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged"
        )
        create_pie()
    with gr.Column():
        gr.Markdown("Pull requests opened, closed and merged over time (cumulative)")
        gr.Plot(plot_over_time())
    with gr.Column():
        gr.Markdown("## Pull requests status by PR number")
        gr.DataFrame(group_status_by_pr_number())
demo.launch(debug=True)