Spaces:
Running
Running
Commit
·
e86b89f
1
Parent(s):
9eda2f5
Add team filtering after percentile computation in pitch leaderboard
Browse files- pitch_leaderboard.py +10 -6
- plotting.py +1 -74
pitch_leaderboard.py
CHANGED
@@ -43,9 +43,6 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
43 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
44 |
if pitcher_lr != 'Both':
|
45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
46 |
-
|
47 |
-
if include_teams is not None:
|
48 |
-
data = data.filter(pl.col('pitcher_team_name_short').is_in(include_teams))
|
49 |
|
50 |
# both, left, right = [
|
51 |
# (
|
@@ -80,6 +77,9 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
80 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
81 |
)
|
82 |
|
|
|
|
|
|
|
83 |
styling = []
|
84 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
85 |
styling_row = []
|
@@ -143,8 +143,8 @@ def create_pitch_leaderboard():
|
|
143 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
144 |
column_widths=[200, 100, 200, 200] + [100]*len(STATS),
|
145 |
show_copy_button=True,
|
146 |
-
show_search=
|
147 |
-
pinned_columns=3
|
148 |
)
|
149 |
|
150 |
gr.Markdown(notes)
|
@@ -152,7 +152,11 @@ def create_pitch_leaderboard():
|
|
152 |
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
|
153 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
154 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
155 |
-
# pin_columns.input(
|
|
|
|
|
|
|
|
|
156 |
|
157 |
return app
|
158 |
|
|
|
43 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
44 |
if pitcher_lr != 'Both':
|
45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
|
|
|
|
|
|
46 |
|
47 |
# both, left, right = [
|
48 |
# (
|
|
|
77 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
78 |
)
|
79 |
|
80 |
+
if include_teams is not None:
|
81 |
+
pitch_stats = pitch_stats.filter(pl.col('Team').is_in(include_teams))
|
82 |
+
|
83 |
styling = []
|
84 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
85 |
styling_row = []
|
|
|
143 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
144 |
column_widths=[200, 100, 200, 200] + [100]*len(STATS),
|
145 |
show_copy_button=True,
|
146 |
+
show_search='filter',
|
147 |
+
pinned_columns=3,
|
148 |
)
|
149 |
|
150 |
gr.Markdown(notes)
|
|
|
152 |
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
|
153 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
154 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
155 |
+
# pin_columns.input(
|
156 |
+
# lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
|
157 |
+
# inputs=pin_columns,
|
158 |
+
# outputs=leaderboard
|
159 |
+
# )
|
160 |
|
161 |
return app
|
162 |
|
plotting.py
CHANGED
@@ -10,86 +10,13 @@ import numpy as np
|
|
10 |
from types import SimpleNamespace
|
11 |
from datetime import date
|
12 |
|
13 |
-
from data import data_df
|
14 |
from convert import ball_kind_code_to_color, get_text_color_from_color
|
15 |
-
from stats import
|
16 |
|
17 |
|
18 |
mpl.use('Agg')
|
19 |
|
20 |
|
21 |
-
def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
|
22 |
-
source_data = data_df.filter(pl.col('ballKind_code') != '-')
|
23 |
-
|
24 |
-
# if start_date is not None:
|
25 |
-
# source_data = source_data.filter(pl.col('date') >= start_date)
|
26 |
-
# if end_date is not None:
|
27 |
-
# source_data = source_data.filter(pl.col('date') <= end_date)
|
28 |
-
#
|
29 |
-
# if game_kind is not None:
|
30 |
-
# source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
|
31 |
-
source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
|
32 |
-
|
33 |
-
source_data = (
|
34 |
-
compute_team_games(source_data)
|
35 |
-
.with_columns(
|
36 |
-
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
37 |
-
pl.col('inning_code').unique().len().over('pitId').alias('IP')
|
38 |
-
)
|
39 |
-
)
|
40 |
-
|
41 |
-
if min_ip == 'qualified':
|
42 |
-
source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
|
43 |
-
else:
|
44 |
-
source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
45 |
-
|
46 |
-
if lr is not None:
|
47 |
-
source_data = source_data.filter(pl.col('batLR') == lr)
|
48 |
-
|
49 |
-
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
|
50 |
-
|
51 |
-
pitch_shapes = (
|
52 |
-
source_data
|
53 |
-
.filter(
|
54 |
-
(pl.col('pitId') == id) &
|
55 |
-
pl.col('x').is_not_null() &
|
56 |
-
pl.col('y').is_not_null() &
|
57 |
-
(pl.col('ballSpeed') > 0)
|
58 |
-
)
|
59 |
-
[['pitId', 'general_ballKind_code', 'ballKind_code', 'ballSpeed', 'x', 'y']]
|
60 |
-
)
|
61 |
-
|
62 |
-
pitcher_stats = (
|
63 |
-
source_data
|
64 |
-
.group_by('pitId')
|
65 |
-
.agg(
|
66 |
-
pl.col('pitcher_name').first(),
|
67 |
-
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
68 |
-
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
69 |
-
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
70 |
-
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
71 |
-
pl.first('qualified')
|
72 |
-
)
|
73 |
-
.explode('batType')
|
74 |
-
.unnest('batType')
|
75 |
-
.pivot(on='batType', values='proportion')
|
76 |
-
.fill_null(0)
|
77 |
-
.with_columns(
|
78 |
-
(pl.col('G') + pl.col('B')).alias('GB%'),
|
79 |
-
(pl.col('F') + pl.col('P')).alias('FB%'),
|
80 |
-
pl.col('L').alias('LD%'),
|
81 |
-
)
|
82 |
-
.drop('G', 'F', 'B', 'P', 'L')
|
83 |
-
.with_columns(
|
84 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
85 |
-
for stat in ['CSW%', 'K%', 'BB%', 'GB%']
|
86 |
-
)
|
87 |
-
.filter(pl.col('pitId') == id)
|
88 |
-
)
|
89 |
-
|
90 |
-
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|
91 |
-
|
92 |
-
|
93 |
def get_card_data(id, **kwargs):
|
94 |
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
|
95 |
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
|
|
|
10 |
from types import SimpleNamespace
|
11 |
from datetime import date
|
12 |
|
|
|
13 |
from convert import ball_kind_code_to_color, get_text_color_from_color
|
14 |
+
from stats import get_pitcher_stats
|
15 |
|
16 |
|
17 |
mpl.use('Agg')
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def get_card_data(id, **kwargs):
|
21 |
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
|
22 |
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
|