Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| from datasets import load_dataset | |
| st.set_page_config(layout="wide") | |
| col1, col2 = st.columns([2, 3]) # Adjust the width ratio as needed | |
| sources = [ | |
| "https://huggingface.co/datasets/cfahlgren1/hub-stats", | |
| "https://huggingface.co/datasets/maxiw/hf-posts", | |
| ] | |
| with col1: | |
| st.header("HuggingFace 🤗 Posts leaderboard") | |
| with col2: | |
| selected_source = st.selectbox( | |
| "Data Source:", | |
| options=sources, | |
| index=0, | |
| ) | |
| if selected_source == sources[0]: | |
| try: | |
| df = pd.read_parquet("hf://datasets/cfahlgren1/hub-stats/posts.parquet") | |
| # ds = load_dataset("cfahlgren1/hub-stats", "posts") | |
| # df = pd.DataFrame(ds['train']).info() | |
| df["Name"] = df.fullname | |
| df["username"] = df.name | |
| except Exception as exp: | |
| st.error(f''' | |
| ERROR>> in loading {selected_source} | |
| >> {exp}''', icon="🚨") | |
| selected_source = sources[1] | |
| st.info(f''' | |
| This can be solved by "Space Restart" | |
| Switching Sources for now... | |
| New Source: {selected_source}''', icon="ℹ️") | |
| if selected_source == sources[1]: | |
| df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True) | |
| df["publishedAt"] = pd.to_datetime(df.publishedAt) | |
| print(">>> ", df.columns) | |
| df["Name"] = df.author.apply(lambda x: x["fullname"]) | |
| df["username"] = df.author.apply(lambda x: x["name"]) | |
| # Define the metrics | |
| metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"] | |
| # Get min and max dates from the DataFrame | |
| min_date = df["publishedAt"].min().to_pydatetime() | |
| max_date = df["publishedAt"].max().to_pydatetime() | |
| # Create columns for the slider and the selectbox | |
| col1, col2 = st.columns([3, 1]) # Adjust the width ratio as needed | |
| with col1: | |
| date_range = st.slider( | |
| "Select Date Range", | |
| min_value=min_date, | |
| max_value=max_date, | |
| value=(min_date, max_date), | |
| format="DD/MMM/YYYY", | |
| ) | |
| with col2: | |
| selected_metric = st.selectbox( | |
| "Sort by:", | |
| options=metrics, | |
| index=0, | |
| ) | |
| # Filter the DataFrame based on selected date range | |
| mask = df["publishedAt"].between(*date_range) | |
| df = df[mask] | |
| df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x])) | |
| df["Num of posts"] = 1 | |
| # Ensure metrics columns are integers, handling NaN values | |
| df[metrics] = df[metrics].fillna(0).astype(int) | |
| data = ( | |
| df.groupby(["username", "Name"])[metrics] | |
| .sum() | |
| .sort_values(selected_metric, ascending=False) | |
| .reset_index() | |
| ) | |
| data.index = np.arange(1, len(data) + 1) | |
| data.index.name = "Rank" | |
| # Format metrics columns with commas | |
| data[metrics] = data[metrics].applymap(lambda x: f"{x:,}") | |
| def make_clickable(val): | |
| return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>' | |
| df_styled = data.style.format({"username": make_clickable}) | |
| st.write( | |
| f"""<center>{df_styled.to_html(escape=False, index=False)}""", | |
| unsafe_allow_html=True, | |
| ) | |