Jon Solow
commited on
Commit
·
203b087
1
Parent(s):
78974c7
Update github_data to get stats and player rosters
Browse files
src/queries/nflverse/github_data.py
CHANGED
@@ -8,6 +8,7 @@ duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
|
|
8 |
|
9 |
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
|
10 |
|
|
|
11 |
|
12 |
FANTASY_POSITIONS = [
|
13 |
"QB",
|
@@ -19,73 +20,40 @@ FANTASY_POSITIONS = [
|
|
19 |
]
|
20 |
|
21 |
|
22 |
-
def
|
23 |
-
df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df()
|
24 |
-
df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS)
|
25 |
-
return df
|
26 |
-
|
27 |
-
|
28 |
-
def get_play_by_play(season_int: int) -> pd.DataFrame:
|
29 |
-
df = duckdb.sql(f"SELECT * from pbp_play_by_play_{season_int}").df()
|
30 |
-
return df
|
31 |
-
|
32 |
-
|
33 |
-
def get_player_stats(season_int: int) -> pd.DataFrame:
|
34 |
-
df = duckdb.sql("SELECT * from player_stats_player_stats").df()
|
35 |
-
return df
|
36 |
-
|
37 |
-
|
38 |
-
def get_ftn_charting(season_int: int) -> pd.DataFrame:
|
39 |
-
df = duckdb.sql(f"SELECT * from ftn_charting_ftn_charting_{season_int}").df()
|
40 |
-
return df
|
41 |
-
|
42 |
-
|
43 |
-
def get_pbp_participation(season_int: int) -> pd.DataFrame:
|
44 |
df = duckdb.sql(
|
45 |
f"""
|
|
|
|
|
|
|
|
|
|
|
46 |
SELECT
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
55 |
"""
|
56 |
).df()
|
57 |
return df
|
58 |
|
59 |
|
60 |
-
def
|
61 |
-
df = duckdb.sql(f"SELECT * from
|
62 |
return df
|
63 |
|
64 |
|
65 |
-
SEASON = "2023"
|
66 |
-
|
67 |
NFLVERSE_ASSETS = [
|
68 |
-
("
|
69 |
-
("espn_data", "qbr_season_level.parquet"),
|
70 |
-
("espn_data", "qbr_week_level.parquet"),
|
71 |
-
("players", "players.parquet"),
|
72 |
-
("pbp_participation", f"pbp_participation_{SEASON}.parquet"),
|
73 |
-
("snap_counts", f"snap_counts_{SEASON}.parquet"),
|
74 |
("player_stats", f"player_stats_{SEASON}.parquet"),
|
75 |
-
("player_stats", f"player_stats_def_{SEASON}.parquet"),
|
76 |
-
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
|
77 |
-
("pfr_advstats", "advstats_season_def.parquet"),
|
78 |
-
("pfr_advstats", "advstats_season_pass.parquet"),
|
79 |
-
("pfr_advstats", "advstats_season_rec.parquet"),
|
80 |
-
("pfr_advstats", "advstats_season_rush.parquet"),
|
81 |
-
("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"),
|
82 |
-
("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"),
|
83 |
-
("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"),
|
84 |
-
("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"),
|
85 |
-
("pbp", f"play_by_play_{SEASON}.parquet"),
|
86 |
-
("nextgen_stats", "ngs_passing.parquet"),
|
87 |
-
("nextgen_stats", "ngs_receiving.parquet"),
|
88 |
-
("nextgen_stats", "ngs_rushing.parquet"),
|
89 |
]
|
90 |
|
91 |
|
|
|
8 |
|
9 |
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
|
10 |
|
11 |
+
SEASON = "2023"
|
12 |
|
13 |
FANTASY_POSITIONS = [
|
14 |
"QB",
|
|
|
20 |
]
|
21 |
|
22 |
|
23 |
+
def get_weekly_rosters(season_int: int = SEASON) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
df = duckdb.sql(
|
25 |
f"""
|
26 |
+
with grouped_stats as (
|
27 |
+
select player_id, sum(fantasy_points) as fantasy_points
|
28 |
+
from player_stats_player_stats_{season_int}
|
29 |
+
group by player_id
|
30 |
+
)
|
31 |
SELECT
|
32 |
+
team
|
33 |
+
, wr.position
|
34 |
+
, wr.jersey_number
|
35 |
+
, wr.status
|
36 |
+
, wr.full_name
|
37 |
+
, wr.headshot_url
|
38 |
+
, wr.week
|
39 |
+
, wr.gsis_id
|
40 |
+
, gs.fantasy_points
|
41 |
+
from weekly_rosters_roster_weekly_{season_int} wr
|
42 |
+
join grouped_stats gs
|
43 |
+
on wr.gsis_id = gs.player_id
|
44 |
"""
|
45 |
).df()
|
46 |
return df
|
47 |
|
48 |
|
49 |
+
def get_player_stats(season_int: int = SEASON) -> pd.DataFrame:
|
50 |
+
df = duckdb.sql(f"SELECT * from player_stats_player_stats_{season_int}").df()
|
51 |
return df
|
52 |
|
53 |
|
|
|
|
|
54 |
NFLVERSE_ASSETS = [
|
55 |
+
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
|
|
|
|
|
|
|
|
|
|
|
56 |
("player_stats", f"player_stats_{SEASON}.parquet"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
]
|
58 |
|
59 |
|