patrickramos commited on
Commit
e86b89f
·
1 Parent(s): 9eda2f5

Add team filtering after percentile computation in pitch leaderboard

Browse files
Files changed (2) hide show
  1. pitch_leaderboard.py +10 -6
  2. plotting.py +1 -74
pitch_leaderboard.py CHANGED
@@ -43,9 +43,6 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
43
  data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
44
  if pitcher_lr != 'Both':
45
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
46
-
47
- if include_teams is not None:
48
- data = data.filter(pl.col('pitcher_team_name_short').is_in(include_teams))
49
 
50
  # both, left, right = [
51
  # (
@@ -80,6 +77,9 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
80
  # [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
81
  )
82
 
 
 
 
83
  styling = []
84
  for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
85
  styling_row = []
@@ -143,8 +143,8 @@ def create_pitch_leaderboard():
143
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
144
  column_widths=[200, 100, 200, 200] + [100]*len(STATS),
145
  show_copy_button=True,
146
- show_search=True,
147
- pinned_columns=3
148
  )
149
 
150
  gr.Markdown(notes)
@@ -152,7 +152,11 @@ def create_pitch_leaderboard():
152
  search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
153
  all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
154
  all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
155
- # pin_columns.input(lambda _pin_columns : (gr.update(pinned_columns=None if _pin_columns else 3), not _pin_columns), inputs=pin_columns, outputs=[leaderboard, pin_columns])
 
 
 
 
156
 
157
  return app
158
 
 
43
  data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
44
  if pitcher_lr != 'Both':
45
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
 
 
 
46
 
47
  # both, left, right = [
48
  # (
 
77
  # [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
78
  )
79
 
80
+ if include_teams is not None:
81
+ pitch_stats = pitch_stats.filter(pl.col('Team').is_in(include_teams))
82
+
83
  styling = []
84
  for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
85
  styling_row = []
 
143
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
144
  column_widths=[200, 100, 200, 200] + [100]*len(STATS),
145
  show_copy_button=True,
146
+ show_search='filter',
147
+ pinned_columns=3,
148
  )
149
 
150
  gr.Markdown(notes)
 
152
  search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
153
  all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
154
  all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
155
+ # pin_columns.input(
156
+ # lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
157
+ # inputs=pin_columns,
158
+ # outputs=leaderboard
159
+ # )
160
 
161
  return app
162
 
plotting.py CHANGED
@@ -10,86 +10,13 @@ import numpy as np
10
  from types import SimpleNamespace
11
  from datetime import date
12
 
13
- from data import data_df
14
  from convert import ball_kind_code_to_color, get_text_color_from_color
15
- from stats import filter_data_by_date_and_game_kind, compute_team_games, compute_pitch_stats
16
 
17
 
18
  mpl.use('Agg')
19
 
20
 
21
- def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
22
- source_data = data_df.filter(pl.col('ballKind_code') != '-')
23
-
24
- # if start_date is not None:
25
- # source_data = source_data.filter(pl.col('date') >= start_date)
26
- # if end_date is not None:
27
- # source_data = source_data.filter(pl.col('date') <= end_date)
28
- #
29
- # if game_kind is not None:
30
- # source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
31
- source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
32
-
33
- source_data = (
34
- compute_team_games(source_data)
35
- .with_columns(
36
- pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
37
- pl.col('inning_code').unique().len().over('pitId').alias('IP')
38
- )
39
- )
40
-
41
- if min_ip == 'qualified':
42
- source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
43
- else:
44
- source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
45
-
46
- if lr is not None:
47
- source_data = source_data.filter(pl.col('batLR') == lr)
48
-
49
- pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
50
-
51
- pitch_shapes = (
52
- source_data
53
- .filter(
54
- (pl.col('pitId') == id) &
55
- pl.col('x').is_not_null() &
56
- pl.col('y').is_not_null() &
57
- (pl.col('ballSpeed') > 0)
58
- )
59
- [['pitId', 'general_ballKind_code', 'ballKind_code', 'ballSpeed', 'x', 'y']]
60
- )
61
-
62
- pitcher_stats = (
63
- source_data
64
- .group_by('pitId')
65
- .agg(
66
- pl.col('pitcher_name').first(),
67
- (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
68
- (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
69
- (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
70
- pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
71
- pl.first('qualified')
72
- )
73
- .explode('batType')
74
- .unnest('batType')
75
- .pivot(on='batType', values='proportion')
76
- .fill_null(0)
77
- .with_columns(
78
- (pl.col('G') + pl.col('B')).alias('GB%'),
79
- (pl.col('F') + pl.col('P')).alias('FB%'),
80
- pl.col('L').alias('LD%'),
81
- )
82
- .drop('G', 'F', 'B', 'P', 'L')
83
- .with_columns(
84
- (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
85
- for stat in ['CSW%', 'K%', 'BB%', 'GB%']
86
- )
87
- .filter(pl.col('pitId') == id)
88
- )
89
-
90
- return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
91
-
92
-
93
  def get_card_data(id, **kwargs):
94
  both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
95
  pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
 
10
  from types import SimpleNamespace
11
  from datetime import date
12
 
 
13
  from convert import ball_kind_code_to_color, get_text_color_from_color
14
+ from stats import get_pitcher_stats
15
 
16
 
17
  mpl.use('Agg')
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def get_card_data(id, **kwargs):
21
  both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
22
  pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')