Jon Solow
Add in stats to nflverse queries
e0b65b7
raw
history blame
3.98 kB
import duckdb
import pandas as pd
import os
from typing import Callable
from domain.constants import SEASON
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
FANTASY_POSITIONS = [
"QB",
"RB",
"WR",
"TE",
"FB",
"K",
]
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
with grouped_stats as (
select player_id, sum(fantasy_points) as fantasy_points
from player_stats_player_stats_{season_int}
group by player_id
)
SELECT
team
, wr.position
, wr.jersey_number
, wr.status
, wr.full_name
, wr.headshot_url
, wr.week
, wr.gsis_id
, gs.fantasy_points
from weekly_rosters_roster_weekly_{season_int} wr
left join grouped_stats gs
on wr.gsis_id = gs.player_id
"""
).df()
return df
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, week
, passing_tds
, passing_yards
, passing_2pt_conversions
, sack_fumbles_lost
, interceptions
, rushing_tds
, rushing_yards
, rushing_2pt_conversions
, rushing_fumbles_lost
, receptions
, receiving_tds
, receiving_yards
, receiving_2pt_conversions
, receiving_fumbles_lost
, special_teams_tds
from player_stats_player_stats_{season_int}"""
).df()
return df
def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, week
, pat_made
, fg_made_0_19
, fg_made_20_29
, fg_made_30_39
, fg_made_40_49
, fg_made_50_59
, fg_made_60_
from player_stats_player_stats_kicking_{season_int}"""
).df()
return df
NFLVERSE_ASSETS = [
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
("player_stats", f"player_stats_{SEASON}.parquet"),
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
]
class NflVerseDataAsset:
def __init__(
self,
release_tag: str,
asset_name: str,
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
):
self.release_tag = release_tag
self.asset_name = asset_name
self.dataframe_mutation_fxn = dataframe_mutation_fxn
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
def load_parquet_asset_to_df(self) -> pd.DataFrame:
location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
df = pd.read_parquet(location)
return df
def register_asset_to_duckdb(self) -> None:
df = self.load_parquet_asset_to_df()
df = self.dataframe_mutation_fxn(df)
duckdb.register(self.table_name, df)
def load_assets():
for tag, asset in NFLVERSE_ASSETS:
asset = NflVerseDataAsset(tag, asset)
asset.register_asset_to_duckdb()
def get_current_tables() -> list[str]:
current_tables_df = duckdb.sql("SHOW TABLES").df()
return current_tables_df["name"].tolist()
def load_assets_if_no_tables():
if not len(get_current_tables()):
load_assets()