Spaces:

singhsidhukuldeep
/

posts_leaderboard

Sleeping

App Files Files Community

posts_leaderboard / app.py

singhsidhukuldeep

Update app.py

897f09c verified 12 months ago

raw

history blame contribute delete

3.1 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	from datasets import load_dataset

	st.set_page_config(layout="wide")

	col1, col2 = st.columns([2, 3]) # Adjust the width ratio as needed

	sources = [
	"https://huggingface.co/datasets/cfahlgren1/hub-stats",
	"https://huggingface.co/datasets/maxiw/hf-posts",
	]

	with col1:
	st.header("HuggingFace 🤗 Posts leaderboard")

	with col2:
	selected_source = st.selectbox(
	"Data Source:",
	options=sources,
	index=0,
	)

	if selected_source == sources[0]:
	try:
	df = pd.read_parquet("hf://datasets/cfahlgren1/hub-stats/posts.parquet")
	# ds = load_dataset("cfahlgren1/hub-stats", "posts")
	# df = pd.DataFrame(ds['train']).info()
	df["Name"] = df.fullname
	df["username"] = df.name
	except Exception as exp:
	st.error(f'''
	ERROR>> in loading {selected_source}

	>> {exp}''', icon="🚨")
	selected_source = sources[1]
	st.info(f'''
	This can be solved by "Space Restart"

	Switching Sources for now...

	New Source: {selected_source}''', icon="ℹ️")




	if selected_source == sources[1]:
	df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True)

	df["publishedAt"] = pd.to_datetime(df.publishedAt)
	print(">>> ", df.columns)

	df["Name"] = df.author.apply(lambda x: x["fullname"])
	df["username"] = df.author.apply(lambda x: x["name"])

	# Define the metrics
	metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"]


	# Get min and max dates from the DataFrame
	min_date = df["publishedAt"].min().to_pydatetime()
	max_date = df["publishedAt"].max().to_pydatetime()

	# Create columns for the slider and the selectbox
	col1, col2 = st.columns([3, 1]) # Adjust the width ratio as needed

	with col1:
	date_range = st.slider(
	"Select Date Range",
	min_value=min_date,
	max_value=max_date,
	value=(min_date, max_date),
	format="DD/MMM/YYYY",
	)

	with col2:
	selected_metric = st.selectbox(
	"Sort by:",
	options=metrics,
	index=0,
	)


	# Filter the DataFrame based on selected date range
	mask = df["publishedAt"].between(*date_range)
	df = df[mask]


	df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x]))
	df["Num of posts"] = 1

	# Ensure metrics columns are integers, handling NaN values
	df[metrics] = df[metrics].fillna(0).astype(int)

	data = (
	df.groupby(["username", "Name"])[metrics]
	.sum()
	.sort_values(selected_metric, ascending=False)
	.reset_index()
	)
	data.index = np.arange(1, len(data) + 1)
	data.index.name = "Rank"

	# Format metrics columns with commas
	data[metrics] = data[metrics].applymap(lambda x: f"{x:,}")


	def make_clickable(val):
	return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>'


	df_styled = data.style.format({"username": make_clickable})
	st.write(
	f"""<center>{df_styled.to_html(escape=False, index=False)}""",
	unsafe_allow_html=True,
	)