Jon Solow
Add team column to player stats
a0f3421
raw
history blame
4.58 kB
import duckdb
import pandas as pd
import os
import streamlit as st
from typing import Callable
from domain.constants import SEASON
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
FANTASY_POSITIONS = [
"QB",
"RB",
"WR",
"TE",
"FB",
"K",
]
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
with grouped_stats as (
select player_id, sum(fantasy_points) as fantasy_points
from player_stats_player_stats_{season_int}
group by player_id
)
SELECT
team
, wr.position
, wr.jersey_number
, wr.status
, wr.full_name
, wr.headshot_url
, wr.week
, wr.gsis_id
, gs.fantasy_points
from weekly_rosters_roster_weekly_{season_int} wr
left join grouped_stats gs
on wr.gsis_id = gs.player_id
"""
).df()
return df
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, recent_team as team
, week
, passing_tds
, passing_yards
, passing_2pt_conversions
, sack_fumbles_lost
, interceptions
, rushing_tds
, rushing_yards
, rushing_2pt_conversions
, rushing_fumbles_lost
, receptions
, receiving_tds
, receiving_yards
, receiving_2pt_conversions
, receiving_fumbles_lost
, special_teams_tds
from player_stats_player_stats_{season_int}"""
).df()
return df
def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, week
, pat_made
, fg_made_0_19
, fg_made_20_29
, fg_made_30_39
, fg_made_40_49
, fg_made_50_59
, fg_made_60_
from player_stats_player_stats_kicking_{season_int}"""
).df()
return df
def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
select
week
, team
, sum(def_sacks) as def_sacks
, sum(def_interceptions) as def_interceptions
, sum(def_tds) as def_tds
, sum(def_fumble_recovery_opp) as def_fumble_recovery_opp
, sum(def_safety) as def_safety
from player_stats_player_stats_def_{season_int}
group by week, team
"""
).df()
return df
NFLVERSE_ASSETS = [
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
("player_stats", f"player_stats_{SEASON}.parquet"),
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
("player_stats", f"player_stats_def_{SEASON}.parquet"),
]
class NflVerseDataAsset:
def __init__(
self,
release_tag: str,
asset_name: str,
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
):
self.release_tag = release_tag
self.asset_name = asset_name
self.dataframe_mutation_fxn = dataframe_mutation_fxn
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
def load_parquet_asset_to_df(self) -> pd.DataFrame:
location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
df = pd.read_parquet(location)
return df
def register_asset_to_duckdb(self) -> None:
df = self.load_parquet_asset_to_df()
df = self.dataframe_mutation_fxn(df)
duckdb.register(self.table_name, df)
@st.cache_data(ttl=60 * 60 * 24)
def load_assets():
for tag, asset in NFLVERSE_ASSETS:
asset = NflVerseDataAsset(tag, asset)
asset.register_asset_to_duckdb()
def get_current_tables() -> list[str]:
current_tables_df = duckdb.sql("SHOW TABLES").df()
return current_tables_df["name"].tolist()