Spaces:
Running
Running
Commit
·
8fe9801
1
Parent(s):
f89cae0
Add middle-middle location% and team filtering
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ from data import data_df
|
|
4 |
from pitcher_overview import create_pitcher_overview
|
5 |
from pitch_leaderboard import create_pitch_leaderboard
|
6 |
|
|
|
|
|
7 |
if __name__ == '__main__':
|
8 |
with gr.Blocks() as app:
|
9 |
with gr.Tab('Pitcher Overview'):
|
@@ -11,5 +13,5 @@ if __name__ == '__main__':
|
|
11 |
with gr.Tab('Pitch Leaderboard'):
|
12 |
create_pitch_leaderboard()
|
13 |
|
14 |
-
gr.Markdown('Last updated:
|
15 |
app.launch()
|
|
|
4 |
from pitcher_overview import create_pitcher_overview
|
5 |
from pitch_leaderboard import create_pitch_leaderboard
|
6 |
|
7 |
+
updated = '2025-07-21'
|
8 |
+
|
9 |
if __name__ == '__main__':
|
10 |
with gr.Blocks() as app:
|
11 |
with gr.Tab('Pitcher Overview'):
|
|
|
13 |
with gr.Tab('Pitch Leaderboard'):
|
14 |
create_pitch_leaderboard()
|
15 |
|
16 |
+
gr.Markdown(f'Last updated: {updated}')
|
17 |
app.launch()
|
data.py
CHANGED
@@ -230,6 +230,8 @@ data_df = (
|
|
230 |
.then(pl.lit('Postseason'))
|
231 |
.otherwise('GameKindName')
|
232 |
.alias('coarse_game_kind'),
|
|
|
|
|
233 |
)
|
234 |
.with_columns(
|
235 |
pl.col('presult_id').replace_strict(presult).alias('presult')
|
|
|
230 |
.then(pl.lit('Postseason'))
|
231 |
.otherwise('GameKindName')
|
232 |
.alias('coarse_game_kind'),
|
233 |
+
|
234 |
+
pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team')
|
235 |
)
|
236 |
.with_columns(
|
237 |
pl.col('presult_id').replace_strict(presult).alias('presult')
|
pitch_leaderboard.py
CHANGED
@@ -8,16 +8,21 @@ from data import data_df
|
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
-
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
12 |
-
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
13 |
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
**To-do**
|
17 |
- Color cells according to percentiles
|
18 |
'''
|
19 |
|
20 |
-
|
|
|
21 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
22 |
|
23 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
@@ -25,6 +30,9 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
|
|
25 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
26 |
if pitcher_lr != 'Both':
|
27 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
|
|
|
|
|
|
28 |
|
29 |
# both, left, right = [
|
30 |
# (
|
@@ -51,12 +59,12 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
|
|
51 |
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
52 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
53 |
.drop('pitId', 'ballKind_code', 'qualified')
|
54 |
-
.rename({'pitcher_name': 'Pitcher', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
|
55 |
.with_columns(
|
56 |
pl.col(stat).mul(100).round(1)
|
57 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
58 |
)
|
59 |
-
[['Pitcher', 'Pitch', 'Pitch (General)'] + STATS]
|
60 |
)
|
61 |
return pitch_stats
|
62 |
|
@@ -65,33 +73,38 @@ def create_pitch_leaderboard():
|
|
65 |
now = datetime.now()
|
66 |
start_datetime_init = datetime(now.year, 1, 1)
|
67 |
end_datetime_init = now
|
68 |
-
pitch_types = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
69 |
with gr.Blocks() as app:
|
70 |
gr.Markdown('# Pitch Leaderboard')
|
71 |
with gr.Row():
|
72 |
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
73 |
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
74 |
with gr.Row():
|
75 |
-
include_pitches = gr.CheckboxGroup(
|
76 |
with gr.Column(scale=1):
|
77 |
-
all_pitches = gr.Button('Select/Deselect all pitches'
|
78 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
79 |
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
|
|
|
|
|
|
80 |
|
81 |
search = gr.Button('Search')
|
|
|
82 |
leaderboard = gr.DataFrame(
|
83 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
84 |
-
|
85 |
-
column_widths=[200]*3 + [100]*len(STATS),
|
86 |
show_copy_button=True,
|
87 |
show_search=True,
|
88 |
-
pinned_columns=
|
89 |
)
|
90 |
|
|
|
91 |
|
92 |
-
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches], outputs=leaderboard)
|
93 |
-
all_pitches.click(lambda _pitch_types : [] if _pitch_types ==
|
94 |
-
|
|
|
|
|
95 |
return app
|
96 |
|
97 |
if __name__ == '__main__':
|
|
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
+
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
12 |
+
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
13 |
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
14 |
|
15 |
+
PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
16 |
+
TEAMS = ['G', 'S', 'DB', 'D', 'T', 'C', 'F', 'E', 'L', 'M', 'B', 'H']
|
17 |
+
notes = '''**Limitations**
|
18 |
+
- Foreign players names are in Hebpurn romanization.
|
19 |
+
|
20 |
**To-do**
|
21 |
- Color cells according to percentiles
|
22 |
'''
|
23 |
|
24 |
+
|
25 |
+
def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='Both', include_pitches=PITCH_TYPES, include_teams=None):
|
26 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
27 |
|
28 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
|
|
30 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
31 |
if pitcher_lr != 'Both':
|
32 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
33 |
+
|
34 |
+
if include_teams is not None:
|
35 |
+
data = data.filter(pl.col('pitcher_team').is_in(include_teams))
|
36 |
|
37 |
# both, left, right = [
|
38 |
# (
|
|
|
59 |
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
60 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
61 |
.drop('pitId', 'ballKind_code', 'qualified')
|
62 |
+
.rename({'pitcher_name': 'Pitcher', 'pitcher_team': 'Team', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
|
63 |
.with_columns(
|
64 |
pl.col(stat).mul(100).round(1)
|
65 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
66 |
)
|
67 |
+
[['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS]
|
68 |
)
|
69 |
return pitch_stats
|
70 |
|
|
|
73 |
now = datetime.now()
|
74 |
start_datetime_init = datetime(now.year, 1, 1)
|
75 |
end_datetime_init = now
|
|
|
76 |
with gr.Blocks() as app:
|
77 |
gr.Markdown('# Pitch Leaderboard')
|
78 |
with gr.Row():
|
79 |
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
80 |
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
81 |
with gr.Row():
|
82 |
+
include_pitches = gr.CheckboxGroup(PITCH_TYPES, value=PITCH_TYPES, label='Pitches', scale=3)
|
83 |
with gr.Column(scale=1):
|
84 |
+
all_pitches = gr.Button('Select/Deselect all pitches')
|
85 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
86 |
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
87 |
+
with gr.Row():
|
88 |
+
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
89 |
+
all_teams = gr.Button('Select/Deselect all teams')
|
90 |
|
91 |
search = gr.Button('Search')
|
92 |
+
# pin_columns = gr.Checkbox(True, 'Pin columns')
|
93 |
leaderboard = gr.DataFrame(
|
94 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
95 |
+
column_widths=[200, 60, 200, 200] + [100]*len(STATS),
|
|
|
96 |
show_copy_button=True,
|
97 |
show_search=True,
|
98 |
+
pinned_columns=3
|
99 |
)
|
100 |
|
101 |
+
gr.Markdown(notes)
|
102 |
|
103 |
+
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
|
104 |
+
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
105 |
+
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
106 |
+
# pin_columns.input(lambda _pin_columns : (gr.update(pinned_columns=None if _pin_columns else 3), not _pin_columns), inputs=pin_columns, outputs=[leaderboard, pin_columns])
|
107 |
+
|
108 |
return app
|
109 |
|
110 |
if __name__ == '__main__':
|
stats.py
CHANGED
@@ -60,7 +60,7 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
60 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
61 |
pitch_stats = (
|
62 |
data
|
63 |
-
.group_by(id_col, pitch_col)
|
64 |
.agg(
|
65 |
pl.first('pitcher_name'),
|
66 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
@@ -80,7 +80,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
80 |
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
|
81 |
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
|
82 |
(pl.col('y') > 125).mean().alias('High%'),
|
83 |
-
(pl.col('y') <= 125).mean().alias('Low%')
|
|
|
84 |
)
|
85 |
.with_columns(
|
86 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
60 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
61 |
pitch_stats = (
|
62 |
data
|
63 |
+
.group_by(id_col, pitch_col, 'pitcher_team')
|
64 |
.agg(
|
65 |
pl.first('pitcher_name'),
|
66 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
|
|
80 |
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
|
81 |
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
|
82 |
(pl.col('y') > 125).mean().alias('High%'),
|
83 |
+
(pl.col('y') <= 125).mean().alias('Low%'),
|
84 |
+
(pl.col('x').is_between(-20, 20) & pl.col('y').is_between(100, 100+50)).mean().alias('MM%')
|
85 |
)
|
86 |
.with_columns(
|
87 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|