Spaces:
Running
Running
Commit
·
65fefb5
1
Parent(s):
5cc9b28
Add more pitch stats
Browse files- data.py +4 -0
- pitch_leaderboard.py +4 -4
- stats.py +53 -4
data.py
CHANGED
@@ -241,6 +241,10 @@ data_df = (
|
|
241 |
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
242 |
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
243 |
)
|
|
|
|
|
|
|
|
|
244 |
)
|
245 |
|
246 |
if __name__ == '__main__':
|
|
|
241 |
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
242 |
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
243 |
)
|
244 |
+
.with_columns((pl.col('x').is_between(-60, 60) & pl.col('y').is_between(50, 50+150)).alias('zone'))
|
245 |
+
.with_columns((pl.col('x').is_between(-40, 40) & pl.col('y').is_between(75, 75+100)).alias('heart'))
|
246 |
+
.with_columns((pl.col('x').is_between(-80, 80) & pl.col('y').is_between(25, 25+200) & ~pl.col('heart')).alias('shadow'))
|
247 |
+
.with_columns((pl.col('x').is_between(-100, 101) & pl.col('y').is_between(0, 0+251) & ~pl.col('heart') & ~pl.col('shadow')).alias('chase'))
|
248 |
)
|
249 |
|
250 |
if __name__ == '__main__':
|
pitch_leaderboard.py
CHANGED
@@ -2,15 +2,15 @@ import gradio as gr
|
|
2 |
import polars as pl
|
3 |
|
4 |
from datetime import datetime
|
5 |
-
from itertools import chain
|
6 |
|
7 |
from data import data_df
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
-
STATS = ['Count', 'Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
12 |
-
PCT_STATS = ['Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
13 |
-
STATS_WITH_PCTLS = ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
14 |
|
15 |
todo = '''
|
16 |
**To-do**
|
|
|
2 |
import polars as pl
|
3 |
|
4 |
from datetime import datetime
|
5 |
+
# from itertools import chain
|
6 |
|
7 |
from data import data_df
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
+
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
12 |
+
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
13 |
+
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
14 |
|
15 |
todo = '''
|
16 |
**To-do**
|
stats.py
CHANGED
@@ -52,6 +52,48 @@ def compute_team_games(data):
|
|
52 |
)
|
53 |
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
56 |
assert player_type in ('pitcher', 'batter')
|
57 |
assert pitch_class_type in ('general', 'specific')
|
@@ -67,9 +109,16 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
67 |
pl.first(pitch_name_col),
|
68 |
pl.len().alias('count'),
|
69 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
70 |
-
(pl.col('
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
72 |
-
(pl.col('
|
|
|
73 |
)
|
74 |
.with_columns(
|
75 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
@@ -86,8 +135,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
86 |
)
|
87 |
.drop('G', 'F', 'B', 'P', 'L', 'null')
|
88 |
.with_columns(
|
89 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
90 |
-
for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
91 |
)
|
92 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
93 |
.sort(id_col, 'count', descending=[False, True])
|
|
|
52 |
)
|
53 |
|
54 |
|
55 |
+
# def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
56 |
+
# assert player_type in ('pitcher', 'batter')
|
57 |
+
# assert pitch_class_type in ('general', 'specific')
|
58 |
+
# id_col = 'pitId' if player_type == 'pitcher' else 'batId'
|
59 |
+
# pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
60 |
+
# pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
61 |
+
# pitch_stats = (
|
62 |
+
# data
|
63 |
+
# .group_by(id_col, pitch_col)
|
64 |
+
# .agg(
|
65 |
+
# pl.first('pitcher_name'),
|
66 |
+
# *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
67 |
+
# pl.first(pitch_name_col),
|
68 |
+
# pl.len().alias('count'),
|
69 |
+
# pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
70 |
+
# (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
71 |
+
# (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
72 |
+
# (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
|
73 |
+
# )
|
74 |
+
# .with_columns(
|
75 |
+
# (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
76 |
+
# (pl.col('count') >= min_pitches).alias('qualified')
|
77 |
+
# )
|
78 |
+
# .explode('batType')
|
79 |
+
# .unnest('batType')
|
80 |
+
# .pivot(on='batType', values='proportion')
|
81 |
+
# .fill_null(0)
|
82 |
+
# .with_columns(
|
83 |
+
# (pl.col('G') + pl.col('B')).alias('GB%'),
|
84 |
+
# (pl.col('F') + pl.col('P')).alias('FB%'),
|
85 |
+
# pl.col('L').alias('LD%').round(2),
|
86 |
+
# )
|
87 |
+
# .drop('G', 'F', 'B', 'P', 'L', 'null')
|
88 |
+
# .with_columns(
|
89 |
+
# (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
90 |
+
# for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
91 |
+
# )
|
92 |
+
# .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
93 |
+
# .sort(id_col, 'count', descending=[False, True])
|
94 |
+
# )
|
95 |
+
# return pitch_stats
|
96 |
+
|
97 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
98 |
assert player_type in ('pitcher', 'batter')
|
99 |
assert pitch_class_type in ('general', 'specific')
|
|
|
109 |
pl.first(pitch_name_col),
|
110 |
pl.len().alias('count'),
|
111 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
112 |
+
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
113 |
+
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
114 |
+
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
115 |
+
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
116 |
+
((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%'),
|
117 |
+
((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%'),
|
118 |
+
((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%'),
|
119 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
120 |
+
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
121 |
+
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
122 |
)
|
123 |
.with_columns(
|
124 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
135 |
)
|
136 |
.drop('G', 'F', 'B', 'P', 'L', 'null')
|
137 |
.with_columns(
|
138 |
+
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=((stat in ['FB%', 'LD%'] or 'Contact%' in stat)))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
139 |
+
for stat in ['Zone%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
140 |
)
|
141 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
142 |
.sort(id_col, 'count', descending=[False, True])
|