patrickramos commited on
Commit
8fe9801
·
1 Parent(s): f89cae0

Add middle-middle location% and team filtering

Browse files
Files changed (4) hide show
  1. app.py +3 -1
  2. data.py +2 -0
  3. pitch_leaderboard.py +28 -15
  4. stats.py +3 -2
app.py CHANGED
@@ -4,6 +4,8 @@ from data import data_df
4
  from pitcher_overview import create_pitcher_overview
5
  from pitch_leaderboard import create_pitch_leaderboard
6
 
 
 
7
  if __name__ == '__main__':
8
  with gr.Blocks() as app:
9
  with gr.Tab('Pitcher Overview'):
@@ -11,5 +13,5 @@ if __name__ == '__main__':
11
  with gr.Tab('Pitch Leaderboard'):
12
  create_pitch_leaderboard()
13
 
14
- gr.Markdown('Last updated: 2025-07-19')
15
  app.launch()
 
4
  from pitcher_overview import create_pitcher_overview
5
  from pitch_leaderboard import create_pitch_leaderboard
6
 
7
+ updated = '2025-07-21'
8
+
9
  if __name__ == '__main__':
10
  with gr.Blocks() as app:
11
  with gr.Tab('Pitcher Overview'):
 
13
  with gr.Tab('Pitch Leaderboard'):
14
  create_pitch_leaderboard()
15
 
16
+ gr.Markdown(f'Last updated: {updated}')
17
  app.launch()
data.py CHANGED
@@ -230,6 +230,8 @@ data_df = (
230
  .then(pl.lit('Postseason'))
231
  .otherwise('GameKindName')
232
  .alias('coarse_game_kind'),
 
 
233
  )
234
  .with_columns(
235
  pl.col('presult_id').replace_strict(presult).alias('presult')
 
230
  .then(pl.lit('Postseason'))
231
  .otherwise('GameKindName')
232
  .alias('coarse_game_kind'),
233
+
234
+ pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team')
235
  )
236
  .with_columns(
237
  pl.col('presult_id').replace_strict(presult).alias('presult')
pitch_leaderboard.py CHANGED
@@ -8,16 +8,21 @@ from data import data_df
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
- STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
12
- PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
13
  STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
14
 
15
- todo = '''
 
 
 
 
16
  **To-do**
17
  - Color cells according to percentiles
18
  '''
19
 
20
- def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, include_pitches):
 
21
  assert pitcher_lr in ['Both', 'Left', 'Right']
22
 
23
  data = data_df.filter(pl.col('ballKind_code') != '-')
@@ -25,6 +30,9 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
25
  data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
26
  if pitcher_lr != 'Both':
27
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
 
 
 
28
 
29
  # both, left, right = [
30
  # (
@@ -51,12 +59,12 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
51
  compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
52
  .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
53
  .drop('pitId', 'ballKind_code', 'qualified')
54
- .rename({'pitcher_name': 'Pitcher', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
55
  .with_columns(
56
  pl.col(stat).mul(100).round(1)
57
  for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
58
  )
59
- [['Pitcher', 'Pitch', 'Pitch (General)'] + STATS]
60
  )
61
  return pitch_stats
62
 
@@ -65,33 +73,38 @@ def create_pitch_leaderboard():
65
  now = datetime.now()
66
  start_datetime_init = datetime(now.year, 1, 1)
67
  end_datetime_init = now
68
- pitch_types = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
69
  with gr.Blocks() as app:
70
  gr.Markdown('# Pitch Leaderboard')
71
  with gr.Row():
72
  start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
73
  end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
74
  with gr.Row():
75
- include_pitches = gr.CheckboxGroup(pitch_types, value=pitch_types, label='Pitches', scale=3)
76
  with gr.Column(scale=1):
77
- all_pitches = gr.Button('Select/Deselect all pitches', scale=1)
78
  min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
79
  pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
 
 
 
80
 
81
  search = gr.Button('Search')
 
82
  leaderboard = gr.DataFrame(
83
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
84
- # gr_create_pitch_leaderboard(start_date=start_date_init, end_date=end_date_init, min_pitches=100),
85
- column_widths=[200]*3 + [100]*len(STATS),
86
  show_copy_button=True,
87
  show_search=True,
88
- pinned_columns=2
89
  )
90
 
 
91
 
92
- search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches], outputs=leaderboard)
93
- all_pitches.click(lambda _pitch_types : [] if _pitch_types == pitch_types else pitch_types, inputs=include_pitches, outputs=include_pitches)
94
- gr.Markdown(todo)
 
 
95
  return app
96
 
97
  if __name__ == '__main__':
 
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
+ STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
12
+ PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
13
  STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
14
 
15
+ PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
16
+ TEAMS = ['G', 'S', 'DB', 'D', 'T', 'C', 'F', 'E', 'L', 'M', 'B', 'H']
17
+ notes = '''**Limitations**
18
+ - Foreign players names are in Hebpurn romanization.
19
+
20
  **To-do**
21
  - Color cells according to percentiles
22
  '''
23
 
24
+
25
+ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='Both', include_pitches=PITCH_TYPES, include_teams=None):
26
  assert pitcher_lr in ['Both', 'Left', 'Right']
27
 
28
  data = data_df.filter(pl.col('ballKind_code') != '-')
 
30
  data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
31
  if pitcher_lr != 'Both':
32
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
33
+
34
+ if include_teams is not None:
35
+ data = data.filter(pl.col('pitcher_team').is_in(include_teams))
36
 
37
  # both, left, right = [
38
  # (
 
59
  compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
60
  .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
61
  .drop('pitId', 'ballKind_code', 'qualified')
62
+ .rename({'pitcher_name': 'Pitcher', 'pitcher_team': 'Team', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
63
  .with_columns(
64
  pl.col(stat).mul(100).round(1)
65
  for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
66
  )
67
+ [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS]
68
  )
69
  return pitch_stats
70
 
 
73
  now = datetime.now()
74
  start_datetime_init = datetime(now.year, 1, 1)
75
  end_datetime_init = now
 
76
  with gr.Blocks() as app:
77
  gr.Markdown('# Pitch Leaderboard')
78
  with gr.Row():
79
  start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
80
  end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
81
  with gr.Row():
82
+ include_pitches = gr.CheckboxGroup(PITCH_TYPES, value=PITCH_TYPES, label='Pitches', scale=3)
83
  with gr.Column(scale=1):
84
+ all_pitches = gr.Button('Select/Deselect all pitches')
85
  min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
86
  pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
87
+ with gr.Row():
88
+ include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
89
+ all_teams = gr.Button('Select/Deselect all teams')
90
 
91
  search = gr.Button('Search')
92
+ # pin_columns = gr.Checkbox(True, 'Pin columns')
93
  leaderboard = gr.DataFrame(
94
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
95
+ column_widths=[200, 60, 200, 200] + [100]*len(STATS),
 
96
  show_copy_button=True,
97
  show_search=True,
98
+ pinned_columns=3
99
  )
100
 
101
+ gr.Markdown(notes)
102
 
103
+ search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
104
+ all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
105
+ all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
106
+ # pin_columns.input(lambda _pin_columns : (gr.update(pinned_columns=None if _pin_columns else 3), not _pin_columns), inputs=pin_columns, outputs=[leaderboard, pin_columns])
107
+
108
  return app
109
 
110
  if __name__ == '__main__':
stats.py CHANGED
@@ -60,7 +60,7 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
60
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
61
  pitch_stats = (
62
  data
63
- .group_by(id_col, pitch_col)
64
  .agg(
65
  pl.first('pitcher_name'),
66
  *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
@@ -80,7 +80,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
80
  (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
81
  (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
82
  (pl.col('y') > 125).mean().alias('High%'),
83
- (pl.col('y') <= 125).mean().alias('Low%')
 
84
  )
85
  .with_columns(
86
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
 
60
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
61
  pitch_stats = (
62
  data
63
+ .group_by(id_col, pitch_col, 'pitcher_team')
64
  .agg(
65
  pl.first('pitcher_name'),
66
  *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
 
80
  (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
81
  (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
82
  (pl.col('y') > 125).mean().alias('High%'),
83
+ (pl.col('y') <= 125).mean().alias('Low%'),
84
+ (pl.col('x').is_between(-20, 20) & pl.col('y').is_between(100, 100+50)).mean().alias('MM%')
85
  )
86
  .with_columns(
87
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),