Spaces:
Running
Running
import polars as pl | |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None): | |
if start_date is not None: | |
data = data.filter(pl.col('date') >= start_date) | |
if end_date is not None: | |
data = data.filter(pl.col('date') <= end_date) | |
if game_kind is not None: | |
data = data.filter(pl.col('coarse_game_kind') == game_kind) | |
return data | |
def compute_team_games(data): | |
data = ( | |
data | |
.with_columns( | |
pl.col('gameId').unique().len().over('HomeTeamNameES').alias('home_games'), | |
pl.col('gameId').unique().len().over('VisitorTeamNameES').alias('visitor_games') | |
) | |
) | |
game_data = ( | |
data | |
.group_by('HomeTeamNameES') | |
.first() | |
[['HomeTeamNameES', 'home_games']] | |
.rename({'HomeTeamNameES': 'team'}) | |
.join( | |
( | |
data | |
.group_by('VisitorTeamNameES') | |
.first() | |
[['VisitorTeamNameES', 'visitor_games']] | |
.rename({'VisitorTeamNameES': 'team'}) | |
), | |
on='team', | |
) | |
.with_columns((pl.col('home_games')+pl.col('visitor_games')).alias('games')) | |
) | |
return ( | |
data | |
.drop('home_games', 'visitor_games') | |
.join( | |
game_data[['team', 'games']].rename({'games': 'home_games'}), | |
left_on='HomeTeamNameES', | |
right_on='team' | |
) | |
.join( | |
game_data[['team', 'games']].rename({'games': 'visitor_games'}), | |
left_on='VisitorTeamNameES', | |
right_on='team' | |
) | |
) | |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1): | |
assert player_type in ('pitcher', 'batter') | |
assert pitch_class_type in ('general', 'specific') | |
id_col = 'pitId' if player_type == 'pitcher' else 'batId' | |
pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code' | |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind' | |
pitch_stats = ( | |
data | |
.group_by(id_col, pitch_col) | |
.agg( | |
pl.first('pitcher_name'), | |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []), | |
pl.first(pitch_name_col), | |
pl.len().alias('count'), | |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True), | |
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'), | |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'), | |
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%') | |
) | |
.with_columns( | |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'), | |
(pl.col('count') >= min_pitches).alias('qualified') | |
) | |
.explode('batType') | |
.unnest('batType') | |
.pivot(on='batType', values='proportion') | |
.fill_null(0) | |
.with_columns( | |
(pl.col('G') + pl.col('B')).alias('GB%'), | |
(pl.col('F') + pl.col('P')).alias('FB%'), | |
pl.col('L').alias('LD%').round(2), | |
) | |
.drop('G', 'F', 'B', 'P', 'L', 'null') | |
.with_columns( | |
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl') | |
for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%'] | |
) | |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {}) | |
.sort(id_col, 'count', descending=[False, True]) | |
) | |
return pitch_stats | |