|
import duckdb |
|
import pandas as pd |
|
import os |
|
from typing import Callable |
|
|
|
|
|
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") |
|
|
|
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" |
|
|
|
|
|
FANTASY_POSITIONS = [ |
|
"QB", |
|
"RB", |
|
"WR", |
|
"TE", |
|
"FB", |
|
"K", |
|
] |
|
|
|
|
|
def get_snap_counts(season_int: int) -> pd.DataFrame: |
|
df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df() |
|
df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS) |
|
return df |
|
|
|
|
|
def get_play_by_play(season_int: int) -> pd.DataFrame: |
|
df = duckdb.sql(f"SELECT * from pbp_play_by_play_{season_int}").df() |
|
return df |
|
|
|
|
|
def get_player_stats(season_int: int) -> pd.DataFrame: |
|
df = duckdb.sql("SELECT * from player_stats_player_stats").df() |
|
return df |
|
|
|
|
|
def get_ftn_charting(season_int: int) -> pd.DataFrame: |
|
df = duckdb.sql(f"SELECT * from ftn_charting_ftn_charting_{season_int}").df() |
|
return df |
|
|
|
|
|
def get_pbp_participation(season_int: int) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f""" |
|
SELECT |
|
a.* |
|
, b.* |
|
, 1 as count_col |
|
from pbp_participation_pbp_participation_{season_int} a |
|
left join pbp_play_by_play_{season_int} b |
|
on a.play_id = b.play_id |
|
and a.nflverse_game_id = b.game_id |
|
where b.week is not null |
|
""" |
|
).df() |
|
return df |
|
|
|
|
|
def get_nextgen_stats(season_int: int, stat_category: str) -> pd.DataFrame: |
|
df = duckdb.sql(f"SELECT * from nextgen_stats_ngs_{stat_category} where season = {season_int}").df() |
|
return df |
|
|
|
|
|
SEASON = "2023" |
|
|
|
NFLVERSE_ASSETS = [ |
|
("ftn_charting", f"ftn_charting_{SEASON}.parquet"), |
|
("espn_data", "qbr_season_level.parquet"), |
|
("espn_data", "qbr_week_level.parquet"), |
|
("players", "players.parquet"), |
|
("pbp_participation", f"pbp_participation_{SEASON}.parquet"), |
|
("snap_counts", f"snap_counts_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_def_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_kicking_{SEASON}.parquet"), |
|
("pfr_advstats", "advstats_season_def.parquet"), |
|
("pfr_advstats", "advstats_season_pass.parquet"), |
|
("pfr_advstats", "advstats_season_rec.parquet"), |
|
("pfr_advstats", "advstats_season_rush.parquet"), |
|
("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"), |
|
("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"), |
|
("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"), |
|
("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"), |
|
("pbp", f"play_by_play_{SEASON}.parquet"), |
|
("nextgen_stats", "ngs_passing.parquet"), |
|
("nextgen_stats", "ngs_receiving.parquet"), |
|
("nextgen_stats", "ngs_rushing.parquet"), |
|
] |
|
|
|
|
|
class NflVerseDataAsset: |
|
def __init__( |
|
self, |
|
release_tag: str, |
|
asset_name: str, |
|
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, |
|
): |
|
self.release_tag = release_tag |
|
self.asset_name = asset_name |
|
self.dataframe_mutation_fxn = dataframe_mutation_fxn |
|
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" |
|
|
|
def load_parquet_asset_to_df(self) -> pd.DataFrame: |
|
location = os.path.join(BASE_URL, self.release_tag, self.asset_name) |
|
df = pd.read_parquet(location) |
|
return df |
|
|
|
def register_asset_to_duckdb(self) -> None: |
|
df = self.load_parquet_asset_to_df() |
|
df = self.dataframe_mutation_fxn(df) |
|
duckdb.register(self.table_name, df) |
|
|
|
|
|
def load_assets(): |
|
for tag, asset in NFLVERSE_ASSETS: |
|
asset = NflVerseDataAsset(tag, asset) |
|
asset.register_asset_to_duckdb() |
|
|
|
|
|
def get_current_tables() -> list[str]: |
|
current_tables_df = duckdb.sql("SHOW TABLES").df() |
|
return current_tables_df["name"].tolist() |
|
|