Jon Solow commited on
Commit
203b087
·
1 Parent(s): 78974c7

Update github_data to get stats and player rosters

Browse files
Files changed (1) hide show
  1. src/queries/nflverse/github_data.py +22 -54
src/queries/nflverse/github_data.py CHANGED
@@ -8,6 +8,7 @@ duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
8
 
9
  BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
10
 
 
11
 
12
  FANTASY_POSITIONS = [
13
  "QB",
@@ -19,73 +20,40 @@ FANTASY_POSITIONS = [
19
  ]
20
 
21
 
22
- def get_snap_counts(season_int: int) -> pd.DataFrame:
23
- df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df()
24
- df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS)
25
- return df
26
-
27
-
28
- def get_play_by_play(season_int: int) -> pd.DataFrame:
29
- df = duckdb.sql(f"SELECT * from pbp_play_by_play_{season_int}").df()
30
- return df
31
-
32
-
33
- def get_player_stats(season_int: int) -> pd.DataFrame:
34
- df = duckdb.sql("SELECT * from player_stats_player_stats").df()
35
- return df
36
-
37
-
38
- def get_ftn_charting(season_int: int) -> pd.DataFrame:
39
- df = duckdb.sql(f"SELECT * from ftn_charting_ftn_charting_{season_int}").df()
40
- return df
41
-
42
-
43
- def get_pbp_participation(season_int: int) -> pd.DataFrame:
44
  df = duckdb.sql(
45
  f"""
 
 
 
 
 
46
  SELECT
47
- a.*
48
- , b.*
49
- , 1 as count_col
50
- from pbp_participation_pbp_participation_{season_int} a
51
- left join pbp_play_by_play_{season_int} b
52
- on a.play_id = b.play_id
53
- and a.nflverse_game_id = b.game_id
54
- where b.week is not null
 
 
 
 
55
  """
56
  ).df()
57
  return df
58
 
59
 
60
- def get_nextgen_stats(season_int: int, stat_category: str) -> pd.DataFrame:
61
- df = duckdb.sql(f"SELECT * from nextgen_stats_ngs_{stat_category} where season = {season_int}").df()
62
  return df
63
 
64
 
65
- SEASON = "2023"
66
-
67
  NFLVERSE_ASSETS = [
68
- ("ftn_charting", f"ftn_charting_{SEASON}.parquet"),
69
- ("espn_data", "qbr_season_level.parquet"),
70
- ("espn_data", "qbr_week_level.parquet"),
71
- ("players", "players.parquet"),
72
- ("pbp_participation", f"pbp_participation_{SEASON}.parquet"),
73
- ("snap_counts", f"snap_counts_{SEASON}.parquet"),
74
  ("player_stats", f"player_stats_{SEASON}.parquet"),
75
- ("player_stats", f"player_stats_def_{SEASON}.parquet"),
76
- ("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
77
- ("pfr_advstats", "advstats_season_def.parquet"),
78
- ("pfr_advstats", "advstats_season_pass.parquet"),
79
- ("pfr_advstats", "advstats_season_rec.parquet"),
80
- ("pfr_advstats", "advstats_season_rush.parquet"),
81
- ("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"),
82
- ("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"),
83
- ("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"),
84
- ("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"),
85
- ("pbp", f"play_by_play_{SEASON}.parquet"),
86
- ("nextgen_stats", "ngs_passing.parquet"),
87
- ("nextgen_stats", "ngs_receiving.parquet"),
88
- ("nextgen_stats", "ngs_rushing.parquet"),
89
  ]
90
 
91
 
 
8
 
9
  BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
10
 
11
+ SEASON = "2023"
12
 
13
  FANTASY_POSITIONS = [
14
  "QB",
 
20
  ]
21
 
22
 
23
+ def get_weekly_rosters(season_int: int = SEASON) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  df = duckdb.sql(
25
  f"""
26
+ with grouped_stats as (
27
+ select player_id, sum(fantasy_points) as fantasy_points
28
+ from player_stats_player_stats_{season_int}
29
+ group by player_id
30
+ )
31
  SELECT
32
+ team
33
+ , wr.position
34
+ , wr.jersey_number
35
+ , wr.status
36
+ , wr.full_name
37
+ , wr.headshot_url
38
+ , wr.week
39
+ , wr.gsis_id
40
+ , gs.fantasy_points
41
+ from weekly_rosters_roster_weekly_{season_int} wr
42
+ join grouped_stats gs
43
+ on wr.gsis_id = gs.player_id
44
  """
45
  ).df()
46
  return df
47
 
48
 
49
+ def get_player_stats(season_int: int = SEASON) -> pd.DataFrame:
50
+ df = duckdb.sql(f"SELECT * from player_stats_player_stats_{season_int}").df()
51
  return df
52
 
53
 
 
 
54
  NFLVERSE_ASSETS = [
55
+ ("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
 
 
 
 
 
56
  ("player_stats", f"player_stats_{SEASON}.parquet"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ]
58
 
59