patrickramos commited on
Commit
9eda2f5
·
1 Parent(s): 7e9d407

Use short team names

Browse files
Files changed (6) hide show
  1. app.py +4 -0
  2. convert.py +28 -3
  3. data.py +14 -2
  4. pitch_leaderboard.py +25 -15
  5. pitcher_overview.py +1 -1
  6. stats.py +76 -1
app.py CHANGED
@@ -5,6 +5,9 @@ from pitcher_overview import create_pitcher_overview
5
  from pitch_leaderboard import create_pitch_leaderboard
6
 
7
  updated = '2025-07-21'
 
 
 
8
 
9
  if __name__ == '__main__':
10
  with gr.Blocks() as app:
@@ -14,4 +17,5 @@ if __name__ == '__main__':
14
  create_pitch_leaderboard()
15
 
16
  gr.Markdown(f'Last updated: {updated}')
 
17
  app.launch()
 
5
  from pitch_leaderboard import create_pitch_leaderboard
6
 
7
  updated = '2025-07-21'
8
+ limitations = '''**General Limitations**
9
+ - Foreign players names are in Hebpurn romanization. Contact me if you need a card for a foreign player.
10
+ '''
11
 
12
  if __name__ == '__main__':
13
  with gr.Blocks() as app:
 
17
  create_pitch_leaderboard()
18
 
19
  gr.Markdown(f'Last updated: {updated}')
20
+ gr.Markdown(limitations)
21
  app.launch()
convert.py CHANGED
@@ -338,12 +338,31 @@ ball_kind_code_to_color = {
338
 
339
 
340
  def get_text_color_from_color(color):
341
- if color in ['gold', 'paleturquoise', 'turquoise']:
342
  return 'black'
343
  return 'white'
344
 
345
  ball_kind_to_color = {ball_kind: ball_kind_code_to_color[ball_kind_code[code]] for code, ball_kind in ball_kind.items()}
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  team_to_color = {
348
  'G': '#f69727',
349
  'S': '#abcd05',
@@ -356,11 +375,17 @@ team_to_color = {
356
  'L': '#00214b',
357
  'M': '#efefef',
358
  'B': '#baa834',
359
- 'H': '#fcc700'
 
 
 
 
360
  }
361
 
 
 
362
  def get_text_color_from_team(team):
363
- if team in ['DB', 'F', 'L', 'E']:
364
  return 'white'
365
  else:
366
  return 'black'
 
338
 
339
 
340
  def get_text_color_from_color(color):
341
+ if color in ['gold', 'khaki', 'paleturquoise', 'turquoise']:
342
  return 'black'
343
  return 'white'
344
 
345
  ball_kind_to_color = {ball_kind: ball_kind_code_to_color[ball_kind_code[code]] for code, ball_kind in ball_kind.items()}
346
 
347
+ team_name_short = {
348
+ 'G': 'Yomiuri',
349
+ 'S': 'Yakult',
350
+ 'DB': 'DeNA',
351
+ 'D': 'Chunichi',
352
+ 'T': 'Hanshin',
353
+ 'C': 'Hiroshima',
354
+ 'F': 'Nipponham',
355
+ 'E': 'Rakuten',
356
+ 'L': 'Seibu',
357
+ 'M': 'Lotte',
358
+ 'B': 'ORIX',
359
+ 'H': 'SoftBank',
360
+ 'PL': 'Pacific League',
361
+ 'CL': 'Central League',
362
+ 'WL': 'Western League', # Why is this in the data?
363
+ 'EL': 'Eastern League', # Same with this
364
+ }
365
+
366
  team_to_color = {
367
  'G': '#f69727',
368
  'S': '#abcd05',
 
375
  'L': '#00214b',
376
  'M': '#efefef',
377
  'B': '#baa834',
378
+ 'H': '#fcc700',
379
+ 'PL': '#01a9e4',
380
+ 'CL': '#129144',
381
+ 'WL': '#552a8d',
382
+ 'EL': '#068ed9'
383
  }
384
 
385
+ team_names_short_to_color = {team_name: team_to_color[team] for team, team_name in team_name_short.items()}
386
+
387
  def get_text_color_from_team(team):
388
+ if team in ['DB', 'F', 'L', 'E', 'DeNA', 'Nipponham', 'Seibu', 'Rakuten']:
389
  return 'white'
390
  else:
391
  return 'black'
data.py CHANGED
@@ -6,6 +6,7 @@ from huggingface_hub import snapshot_download
6
 
7
  from convert import (
8
  aux_global_id_to_code, presult,
 
9
  ball_kind, ball_kind_code, general_ball_kind, general_ball_kind_code, lr,
10
  game_kind
11
  )
@@ -180,7 +181,17 @@ data_df = (
180
  (pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
181
  )
182
  .with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
183
- .join(sched_df[['GameID', 'HomeTeamNameES', 'VisitorTeamNameES']].rename({'GameID': 'gameId'}), on='gameId')
 
 
 
 
 
 
 
 
 
 
184
  .with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
185
  .with_columns(
186
  (pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
@@ -231,7 +242,8 @@ data_df = (
231
  .otherwise('GameKindName')
232
  .alias('coarse_game_kind'),
233
 
234
- pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team')
 
235
  )
236
  .with_columns(
237
  pl.col('presult_id').replace_strict(presult).alias('presult')
 
6
 
7
  from convert import (
8
  aux_global_id_to_code, presult,
9
+ team_name_short,
10
  ball_kind, ball_kind_code, general_ball_kind, general_ball_kind_code, lr,
11
  game_kind
12
  )
 
181
  (pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
182
  )
183
  .with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
184
+ .join(
185
+ (
186
+ sched_df[['GameID', 'HomeTeamNameES', 'VisitorTeamNameES']]
187
+ .rename({'GameID': 'gameId'})
188
+ .with_columns(
189
+ pl.col('HomeTeamNameES').replace_strict(team_name_short).alias('home_team_name_short'),
190
+ pl.col('VisitorTeamNameES').replace_strict(team_name_short).alias('visitor_team_name_short')
191
+ )
192
+ ),
193
+ on='gameId'
194
+ )
195
  .with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
196
  .with_columns(
197
  (pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
 
242
  .otherwise('GameKindName')
243
  .alias('coarse_game_kind'),
244
 
245
+ pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team'),
246
+ pl.when(pl.col('half_inning').str.ends_with(1)).then('home_team_name_short').otherwise('visitor_team_name_short').alias('pitcher_team_name_short')
247
  )
248
  .with_columns(
249
  pl.col('presult_id').replace_strict(presult).alias('presult')
pitch_leaderboard.py CHANGED
@@ -7,7 +7,7 @@ from datetime import datetime
7
 
8
  from data import data_df
9
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
10
- from convert import ball_kind, ball_kind_to_color, get_text_color_from_color, team_to_color, get_text_color_from_team
11
  from plotting import stat_cmap
12
 
13
  STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
@@ -16,12 +16,22 @@ STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O
16
  COLUMNS = ['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS
17
 
18
  PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
19
- TEAMS = ['G', 'S', 'DB', 'D', 'T', 'C', 'F', 'E', 'L', 'M', 'B', 'H']
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  notes = '''**Limitations**
21
- - Foreign players names are in Hebpurn romanization.
22
-
23
- **To-do**
24
- - Color cells according to percentiles
25
  '''
26
 
27
 
@@ -35,7 +45,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
35
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
36
 
37
  if include_teams is not None:
38
- data = data.filter(pl.col('pitcher_team').is_in(include_teams))
39
 
40
  # both, left, right = [
41
  # (
@@ -62,11 +72,11 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
62
  compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
63
  .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
64
  .drop('pitId', 'ballKind_code', 'qualified')
65
- .rename({'pitcher_name': 'Pitcher', 'pitcher_team': 'Team', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'})
66
- # .with_columns(
67
- # pl.col(stat).mul(100).round(1)
68
- # for stat in PCT_STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]
69
- # )
70
  # [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
71
  )
72
 
@@ -78,7 +88,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
78
  r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
79
  styling_row.append(f'background-color: rgba({r}, {g}, {b})')
80
  elif col == 'Team':
81
- styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_to_color[item]}')
82
  elif col in ['Pitch', 'Pitch (General)']:
83
  color = ball_kind_to_color[item]
84
  styling_row.append(f'color: {get_text_color_from_color(color)}; background-color: {color}')
@@ -91,7 +101,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
91
  display_value_row = []
92
  for item in row:
93
  if isinstance(item, float):
94
- display_value_row.append(f'{item:.1%}')
95
  else:
96
  display_value_row.append(item)
97
  display_value.append(display_value_row)
@@ -131,7 +141,7 @@ def create_pitch_leaderboard():
131
  # pin_columns = gr.Checkbox(True, 'Pin columns')
132
  leaderboard = gr.DataFrame(
133
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
134
- column_widths=[200, 60, 200, 200] + [100]*len(STATS),
135
  show_copy_button=True,
136
  show_search=True,
137
  pinned_columns=3
 
7
 
8
  from data import data_df
9
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
10
+ from convert import ball_kind, ball_kind_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
11
  from plotting import stat_cmap
12
 
13
  STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
 
16
  COLUMNS = ['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS
17
 
18
  PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
19
+ TEAMS = [
20
+ 'Yomiuri',
21
+ 'Yakult',
22
+ 'DeNA',
23
+ 'Chunichi',
24
+ 'Hanshin',
25
+ 'Hiroshima',
26
+ 'Nipponham',
27
+ 'Rakuten',
28
+ 'Seibu',
29
+ 'Lotte',
30
+ 'ORIX',
31
+ 'SoftBank'
32
+ ]
33
  notes = '''**Limitations**
34
+ - [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
 
 
 
35
  '''
36
 
37
 
 
45
  data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
46
 
47
  if include_teams is not None:
48
+ data = data.filter(pl.col('pitcher_team_name_short').is_in(include_teams))
49
 
50
  # both, left, right = [
51
  # (
 
72
  compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
73
  .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
74
  .drop('pitId', 'ballKind_code', 'qualified')
75
+ .rename({'pitcher_name': 'Pitcher', 'pitcher_team_name_short': 'Team', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'})
76
+ .with_columns(
77
+ pl.col(stat).mul(100)
78
+ for stat in PCT_STATS
79
+ )
80
  # [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
81
  )
82
 
 
88
  r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
89
  styling_row.append(f'background-color: rgba({r}, {g}, {b})')
90
  elif col == 'Team':
91
+ styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
92
  elif col in ['Pitch', 'Pitch (General)']:
93
  color = ball_kind_to_color[item]
94
  styling_row.append(f'color: {get_text_color_from_color(color)}; background-color: {color}')
 
101
  display_value_row = []
102
  for item in row:
103
  if isinstance(item, float):
104
+ display_value_row.append(f'{item:.1f}%')
105
  else:
106
  display_value_row.append(item)
107
  display_value.append(display_value_row)
 
141
  # pin_columns = gr.Checkbox(True, 'Pin columns')
142
  leaderboard = gr.DataFrame(
143
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
144
+ column_widths=[200, 100, 200, 200] + [100]*len(STATS),
145
  show_copy_button=True,
146
  show_search=True,
147
  pinned_columns=3
pitcher_overview.py CHANGED
@@ -7,7 +7,7 @@ from data import SEASONS, data_df
7
  from plotting import create_pitcher_overview_card
8
 
9
  notes = '''**Limitations**
10
- - Foreign players names are in Hebpurn romanization. Contact me if you need a card for a foreign player.
11
 
12
  **To-do**
13
  - Fix names of foreign players
 
7
  from plotting import create_pitcher_overview_card
8
 
9
  notes = '''**Limitations**
10
+ - Only supports regular season data
11
 
12
  **To-do**
13
  - Fix names of foreign players
stats.py CHANGED
@@ -1,4 +1,7 @@
1
  import polars as pl
 
 
 
2
 
3
  def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
4
  if start_date is not None:
@@ -60,7 +63,7 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
60
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
61
  pitch_stats = (
62
  data
63
- .group_by(id_col, pitch_col, 'pitcher_team')
64
  .agg(
65
  pl.first('pitcher_name'),
66
  *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
@@ -105,3 +108,75 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
105
  .sort(id_col, 'count', descending=[False, True])
106
  )
107
  return pitch_stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import polars as pl
2
+ from data import data_df
3
+
4
+ from types import SimpleNamespace
5
 
6
  def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
7
  if start_date is not None:
 
63
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
64
  pitch_stats = (
65
  data
66
+ .group_by(id_col, pitch_col, 'pitcher_team_name_short')
67
  .agg(
68
  pl.first('pitcher_name'),
69
  *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
 
108
  .sort(id_col, 'count', descending=[False, True])
109
  )
110
  return pitch_stats
111
+
112
+
113
+ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
114
+ source_data = data_df.filter(pl.col('ballKind_code') != '-')
115
+
116
+ # if start_date is not None:
117
+ # source_data = source_data.filter(pl.col('date') >= start_date)
118
+ # if end_date is not None:
119
+ # source_data = source_data.filter(pl.col('date') <= end_date)
120
+ #
121
+ # if game_kind is not None:
122
+ # source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
123
+ source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
124
+
125
+ source_data = (
126
+ compute_team_games(source_data)
127
+ .with_columns(
128
+ pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
129
+ pl.col('inning_code').unique().len().over('pitId').alias('IP')
130
+ )
131
+ )
132
+
133
+ if min_ip == 'qualified':
134
+ source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
135
+ else:
136
+ source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
137
+
138
+ if lr is not None:
139
+ source_data = source_data.filter(pl.col('batLR') == lr)
140
+
141
+ pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
142
+
143
+ pitch_shapes = (
144
+ source_data
145
+ .filter(
146
+ (pl.col('pitId') == id) &
147
+ pl.col('x').is_not_null() &
148
+ pl.col('y').is_not_null() &
149
+ (pl.col('ballSpeed') > 0)
150
+ )
151
+ [['pitId', 'general_ballKind_code', 'ballKind_code', 'ballSpeed', 'x', 'y']]
152
+ )
153
+
154
+ pitcher_stats = (
155
+ source_data
156
+ .group_by('pitId')
157
+ .agg(
158
+ pl.col('pitcher_name').first(),
159
+ (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
160
+ (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
161
+ (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
162
+ pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
163
+ pl.first('qualified')
164
+ )
165
+ .explode('batType')
166
+ .unnest('batType')
167
+ .pivot(on='batType', values='proportion')
168
+ .fill_null(0)
169
+ .with_columns(
170
+ (pl.col('G') + pl.col('B')).alias('GB%'),
171
+ (pl.col('F') + pl.col('P')).alias('FB%'),
172
+ pl.col('L').alias('LD%'),
173
+ )
174
+ .drop('G', 'F', 'B', 'P', 'L')
175
+ .with_columns(
176
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
177
+ for stat in ['CSW%', 'K%', 'BB%', 'GB%']
178
+ )
179
+ .filter(pl.col('pitId') == id)
180
+ )
181
+
182
+ return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)