from typing import Any, List import gradio as gr from toolz import concat, frequencies import httpx from functools import lru_cache import pandas as pd import plotly.express as px import polars as pl from pathlib import Path from datetime import datetime librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face" @lru_cache(maxsize=512) def get_hub_community_activity(user: str) -> List[Any]: all_data = [] for i in range(1, 2000, 100): r = httpx.get( f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}" ) activity = r.json()["recentActivity"] all_data.append(activity) return list(concat(all_data)) @lru_cache(maxsize=512) def get_pr_status(user: str): all_data = get_hub_community_activity(user) pr_data = ( x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"] ) return frequencies(x["status"] for x in pr_data) def create_pie(): frequencies = get_pr_status("librarian-bot") df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()}) fig = px.pie(df, values="number", names="status", template="seaborn") return gr.Plot(fig) def parse_date_time(date_time: str) -> datetime: return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ") def parse_pr_data(data): data = data["discussionData"] createdAt = parse_date_time(data["createdAt"]) pr_number = data["num"] status = data["status"] repo_id = data["repo"]["name"] return { "createdAt": createdAt, "pr_number": pr_number, "status": status, "repo_id": repo_id, } def group_status_by_pr_number(): all_data = get_hub_community_activity("librarian-bot") all_data = [parse_pr_data(d) for d in all_data] return ( pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas() ) def plot_over_time(): all_data = get_hub_community_activity("librarian-bot") all_data = [parse_pr_data(d) for d in all_data] df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date)) df = df.pivot( values=["status"], index=["createdAt"], columns=["status"], aggregate_function="count", ) df = df.fill_null(0) df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt") df = df.to_pandas().set_index("createdAt").cumsum() return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"]) with gr.Blocks() as demo: frequencies = get_pr_status("librarian-bot") gr.HTML(Path("description.html").read_text()) gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}") with gr.Column(): gr.Markdown("## Pull requests Status") gr.Markdown( "The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged" ) create_pie() with gr.Column(): gr.Markdown("Pull requests opened, closed and merged over time (cumulative)") gr.Plot(plot_over_time()) with gr.Column(): gr.Markdown("## Pull requests status by PR number") gr.DataFrame(group_status_by_pr_number()) demo.launch(debug=True)