patrickramos commited on
Commit
65fefb5
·
1 Parent(s): 5cc9b28

Add more pitch stats

Browse files
Files changed (3) hide show
  1. data.py +4 -0
  2. pitch_leaderboard.py +4 -4
  3. stats.py +53 -4
data.py CHANGED
@@ -241,6 +241,10 @@ data_df = (
241
  (pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
242
  (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
243
  )
 
 
 
 
244
  )
245
 
246
  if __name__ == '__main__':
 
241
  (pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
242
  (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
243
  )
244
+ .with_columns((pl.col('x').is_between(-60, 60) & pl.col('y').is_between(50, 50+150)).alias('zone'))
245
+ .with_columns((pl.col('x').is_between(-40, 40) & pl.col('y').is_between(75, 75+100)).alias('heart'))
246
+ .with_columns((pl.col('x').is_between(-80, 80) & pl.col('y').is_between(25, 25+200) & ~pl.col('heart')).alias('shadow'))
247
+ .with_columns((pl.col('x').is_between(-100, 101) & pl.col('y').is_between(0, 0+251) & ~pl.col('heart') & ~pl.col('shadow')).alias('chase'))
248
  )
249
 
250
  if __name__ == '__main__':
pitch_leaderboard.py CHANGED
@@ -2,15 +2,15 @@ import gradio as gr
2
  import polars as pl
3
 
4
  from datetime import datetime
5
- from itertools import chain
6
 
7
  from data import data_df
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
- STATS = ['Count', 'Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
12
- PCT_STATS = ['Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
13
- STATS_WITH_PCTLS = ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
14
 
15
  todo = '''
16
  **To-do**
 
2
  import polars as pl
3
 
4
  from datetime import datetime
5
+ # from itertools import chain
6
 
7
  from data import data_df
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
+ STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
12
+ PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
13
+ STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
14
 
15
  todo = '''
16
  **To-do**
stats.py CHANGED
@@ -52,6 +52,48 @@ def compute_team_games(data):
52
  )
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
56
  assert player_type in ('pitcher', 'batter')
57
  assert pitch_class_type in ('general', 'specific')
@@ -67,9 +109,16 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
67
  pl.first(pitch_name_col),
68
  pl.len().alias('count'),
69
  pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
70
- (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
 
 
 
 
 
 
71
  (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
72
- (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
 
73
  )
74
  .with_columns(
75
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
@@ -86,8 +135,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
86
  )
87
  .drop('G', 'F', 'B', 'P', 'L', 'null')
88
  .with_columns(
89
- (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
90
- for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
91
  )
92
  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
93
  .sort(id_col, 'count', descending=[False, True])
 
52
  )
53
 
54
 
55
+ # def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
56
+ # assert player_type in ('pitcher', 'batter')
57
+ # assert pitch_class_type in ('general', 'specific')
58
+ # id_col = 'pitId' if player_type == 'pitcher' else 'batId'
59
+ # pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
60
+ # pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
61
+ # pitch_stats = (
62
+ # data
63
+ # .group_by(id_col, pitch_col)
64
+ # .agg(
65
+ # pl.first('pitcher_name'),
66
+ # *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
67
+ # pl.first(pitch_name_col),
68
+ # pl.len().alias('count'),
69
+ # pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
70
+ # (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
71
+ # (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
72
+ # (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
73
+ # )
74
+ # .with_columns(
75
+ # (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
76
+ # (pl.col('count') >= min_pitches).alias('qualified')
77
+ # )
78
+ # .explode('batType')
79
+ # .unnest('batType')
80
+ # .pivot(on='batType', values='proportion')
81
+ # .fill_null(0)
82
+ # .with_columns(
83
+ # (pl.col('G') + pl.col('B')).alias('GB%'),
84
+ # (pl.col('F') + pl.col('P')).alias('FB%'),
85
+ # pl.col('L').alias('LD%').round(2),
86
+ # )
87
+ # .drop('G', 'F', 'B', 'P', 'L', 'null')
88
+ # .with_columns(
89
+ # (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
90
+ # for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
91
+ # )
92
+ # .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
93
+ # .sort(id_col, 'count', descending=[False, True])
94
+ # )
95
+ # return pitch_stats
96
+
97
  def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
98
  assert player_type in ('pitcher', 'batter')
99
  assert pitch_class_type in ('general', 'specific')
 
109
  pl.first(pitch_name_col),
110
  pl.len().alias('count'),
111
  pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
112
+ (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
113
+ (pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
114
+ ((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
115
+ ((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
116
+ ((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%'),
117
+ ((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%'),
118
+ ((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%'),
119
  (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
120
+ (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
121
+ (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
122
  )
123
  .with_columns(
124
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
 
135
  )
136
  .drop('G', 'F', 'B', 'P', 'L', 'null')
137
  .with_columns(
138
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=((stat in ['FB%', 'LD%'] or 'Contact%' in stat)))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
139
+ for stat in ['Zone%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
140
  )
141
  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
142
  .sort(id_col, 'count', descending=[False, True])