patrickramos commited on
Commit
b421bc5
·
1 Parent(s): 0ed953a

Add working app

Browse files
Files changed (5) hide show
  1. README.md +4 -4
  2. convert.py +30 -0
  3. data.py +67 -6
  4. pitcher_overview.py +54 -17
  5. plotting.py +396 -0
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
  title: Npb Data App
3
- emoji: 📉
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.37.0
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
1
  ---
2
  title: Npb Data App
3
+ emoji: ⚾️
4
+ colorFrom: white
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.37.0
8
+ app_file: pitcher_overview.py
9
  pinned: false
10
  ---
11
 
convert.py CHANGED
@@ -257,3 +257,33 @@ game_kind = {
257
  37: 'PL Climax Series First Stage',
258
  38: 'PL Climax Series Final Stage'
259
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  37: 'PL Climax Series First Stage',
258
  38: 'PL Climax Series Final Stage'
259
  }
260
+
261
+ ball_kind_code_to_color = {
262
+ '-': '',
263
+ 'FF': 'crimson',
264
+ 'SL': 'gold',
265
+ 'VS': '',
266
+ 'SV': '',
267
+ 'CU': 'paleturquoise',
268
+ 'SC': 'royalblue',
269
+ 'PC': '',
270
+ 'KC': 'rebeccapurple',
271
+ 'FO': 'darkturquoise',
272
+ 'FS': 'cadetblue',
273
+ 'CH': 'mediumseagreen',
274
+ 'SI': '',
275
+ 'SB': '',
276
+ 'PB': '',
277
+ 'SH': 'tomato',
278
+ 'FT': '',
279
+ 'FW': '',
280
+ 'FC': 'sienna',
281
+ 'EP': '', # technically "super" eephus but I haven't encountered a normal one yet
282
+ 'HS': '',
283
+ 'HL': ''
284
+ }
285
+ ball_kind_code_to_color = {k: v if v else 'C0' for k, v in ball_kind_code_to_color.items()}
286
+ def get_text_color_from_color(color):
287
+ if color in ['gold', 'paleturquoise']:
288
+ return 'black'
289
+ return 'white'
data.py CHANGED
@@ -1,13 +1,19 @@
1
  import polars as pl
2
  import os
3
  from tqdm.auto import tqdm
 
 
4
 
5
  from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, lr, game_kind
6
 
7
- DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
8
- # SEASONS = list(range(2021, 2025+1))
 
 
 
 
 
9
  SEASONS = [2021, 2022, 2023, 2024, 2025]
10
- # SEASONS = [2024]
11
 
12
  data_df = pl.DataFrame()
13
  text_df = pl.DataFrame()
@@ -31,8 +37,60 @@ for season in tqdm(SEASONS):
31
  _aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
32
  aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # sched_df = sched_df.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  aux_df = (
38
  aux_df
@@ -131,8 +189,8 @@ data_df = (
131
  on='universal_code',
132
  how='left'
133
  )
134
- .with_columns(
135
-
136
  )
137
  .join(
138
  text_df[['GameID', 'GameKindID']].with_columns(
@@ -176,3 +234,6 @@ data_df = (
176
  (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
177
  )
178
  )
 
 
 
 
1
  import polars as pl
2
  import os
3
  from tqdm.auto import tqdm
4
+ import pykakasi
5
+ from huggingface_hub import snapshot_download
6
 
7
  from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, lr, game_kind
8
 
9
+
10
+ DATA_PATH = snapshot_download(
11
+ repo_id='Ramos-Ramos/npb_data_app',
12
+ repo_type='dataset',
13
+ local_dir='./files',
14
+ cache_dir='./.cache'
15
+ )
16
  SEASONS = [2021, 2022, 2023, 2024, 2025]
 
17
 
18
  data_df = pl.DataFrame()
19
  text_df = pl.DataFrame()
 
37
  _aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
38
  aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
39
 
40
+ players_df = pl.read_parquet(os.path.join(DATA_PATH, 'players.parquet'))
41
+ kana_df = pl.read_parquet(os.path.join(DATA_PATH, 'players_kana.parquet'))
42
+
43
+ kks = pykakasi.kakasi()
44
+ kana_df = (
45
+ kana_df
46
+ .with_columns(
47
+ pl.col('name').str.normalize('NFKC'),
48
+ (
49
+ pl.col('name_kana')
50
+ .map_elements(
51
+ lambda name: ''.join([word['hepburn'].capitalize() for word in kks.convert(name)]),
52
+ return_dtype=pl.String
53
+ )
54
+ .alias('name_en')
55
+ )
56
+ )
57
+ .with_columns(pl.col('name_en').str.to_lowercase())
58
+ )
59
+
60
+ for old_part, new_part in [
61
+ ('you', 'yo'),
62
+ ('kou', 'ko'),
63
+ ('gou', 'go'),
64
+ ('shou', 'sho'),
65
+ ('jou', 'jo'),
66
+ ('rou', 'ro'),
67
+ ('ou', 'oh'),
68
+ ('shuu', 'shu'),
69
+ ('ryuu', 'ryu'),
70
+ ('yuu', 'yu'),
71
+ ('oo', 'o') # messes with someone whose name ends in koo
72
+ ]:
73
+ kana_df = kana_df.with_columns(pl.col('name_en').str.replace(old_part, new_part))
74
 
75
+ kana_df = kana_df.with_columns(pl.col('name_en').str.to_titlecase())
76
+
77
+ players_df = players_df.with_columns(pl.col('playerName').str.normalize('NFKC'))
78
+ for old_char, new_char in [
79
+ ('崎', '﨑'),
80
+ ('高', '髙'),
81
+ ('徳', '德'),
82
+ ('濱', '濵'),
83
+ ('瀬', '瀨')
84
+ ]:
85
+ players_df = (
86
+ players_df.with_columns(
87
+ pl.when(~pl.col('playerName').is_in(kana_df['name']))
88
+ .then(pl.col('playerName').str.replace(old_char, new_char))
89
+ .otherwise('playerName')
90
+ )
91
+ )
92
+
93
+ players_df = players_df.join(kana_df, left_on='playerName', right_on='name', how='left')
94
 
95
  aux_df = (
96
  aux_df
 
189
  on='universal_code',
190
  how='left'
191
  )
192
+ .join(
193
+ players_df.rename({'name_en': 'pitcher_name'}), left_on='pitId', right_on='playerId', how='left'
194
  )
195
  .join(
196
  text_df[['GameID', 'GameKindID']].with_columns(
 
234
  (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
235
  )
236
  )
237
+
238
+ if __name__ == '__main__':
239
+ breakpoint()
pitcher_overview.py CHANGED
@@ -1,36 +1,73 @@
1
  import gradio as gr
 
 
2
 
3
- from data import SEASONS
 
 
4
 
5
  def dummy(*inputs):
6
  return inputs
7
 
8
- def adjust_season_end_based_on_season_start(season_start, season_end):
9
- return max(season_start, season_end)
 
 
 
 
 
 
 
 
 
 
10
 
11
- def adjust_season_start_based_on_season_end(season_end, season_start):
12
- return min(season_start, season_end)
 
 
 
13
 
14
 
15
  def create_pitcher_overview(data_df):
16
  with gr.Blocks() as app:
17
- gr.Markdown('Test')
18
 
19
- name = gr.Dropdown(sorted(data_df['pitId'].unique().to_list()), label='Name')
20
- season_start = gr.Dropdown(SEASONS, label='Season start')
21
- season_end = gr.Dropdown(SEASONS, label='Season end')
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- season_start.input(adjust_season_end_based_on_season_start, inputs=[season_start, season_end], outputs=season_end)
24
- season_end.input(adjust_season_start_based_on_season_end, inputs=[season_end, season_start], outputs=season_start)
 
 
 
 
25
 
26
- game_type = gr.Dropdown(['Spring Training', 'Regular Season', 'Postseason'], label='Game Type'])
27
-
28
- generate = gr.Button('Generate')
29
 
30
- dummy_io = [name, season_start, season_end, game_type
31
- generate.click(dummy, inputs=dummy_io, outputs=dummy_io)
 
 
 
 
 
32
 
33
  return app
34
 
35
  if __name__ == '__main__':
36
- create_pitcher_overview().launch()
 
1
  import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ import polars as pl
4
 
5
+ from data import SEASONS, data_df
6
+
7
+ from plotting import create_pitcher_overview_card
8
 
9
  def dummy(*inputs):
10
  return inputs
11
 
12
+ def gr_create_pitcher_overview_card(name, season):
13
+ # pit_id = name.split(' | ')[-1]
14
+ pit_id = data_df.filter(pl.col('pitcher_name') == name)['pitId'].unique()
15
+ if len(pit_id) == 0:
16
+ raise gr.Error(f"No data found for {name}. If the name looks strangely spelled or formatted there's a possibility that's what causing the error.")
17
+ elif len(pit_id) > 1:
18
+ raise gr.Error(f'Multiple IDs for {name}')
19
+ else:
20
+ pit_id = pit_id.item()
21
+ create_pitcher_overview_card(pit_id, season=season, dpi=300)
22
+ plt.savefig('tmp.png', bbox_inches='tight')
23
+ return 'tmp.png'
24
 
25
+ # def adjust_season_end_based_on_season_start(season_start, season_end):
26
+ # return max(season_start, season_end)
27
+ #
28
+ # def adjust_season_start_based_on_season_end(season_end, season_start):
29
+ # return min(season_start, season_end)
30
 
31
 
32
  def create_pitcher_overview(data_df):
33
  with gr.Blocks() as app:
34
+ gr.Markdown('Pitcher overview')
35
 
36
+ with gr.Row():
37
+ with gr.Column():
38
+ # names = [f'{pit_name} | {pit_id}' for pit_name, pit_id in data_df[['pitcher_name', 'pitId']].unique().sort('pitId').iter_rows()]
39
+ names = data_df['pitcher_name'].unique().sort().to_list()
40
+ name = gr.Dropdown(names, label='Name')
41
+ season = gr.Dropdown(SEASONS, label='Season')
42
+ # season_start = gr.Dropdown(SEASONS, label='Season start')
43
+ # season_end = gr.Dropdown(SEASONS, label='Season end')
44
+ # game_type = gr.Dropdown(['Spring Training', 'Regular Season', 'Postseason'], label='Game Type'])
45
+ view = gr.Button('View')
46
+ gr.Markdown(
47
+ '''
48
+ **Limitations**
49
+ - Foreign players names are in Hebpurn romanization. Contact me if you need a card for a foreign player.
50
 
51
+ **To-do**
52
+ - Fix names of foreign playeres
53
+ - Add teams insignias
54
+ - Measure percentiles per pitcher handedness
55
+ - Allow for arbitrary date ranges
56
+ - Improve readability of pitch velocities
57
 
58
+ Last updated: 2025-07-19
59
+ '''
60
+ )
61
 
62
+ with gr.Column():
63
+ overview_card = gr.Image(label='Overview')
64
+
65
+ # season_start.input(adjust_season_end_based_on_season_start, inputs=[season_start, season_end], outputs=season_end)
66
+ # season_end.input(adjust_season_start_based_on_season_end, inputs=[season_end, season_start], outputs=season_start)
67
+ view.click(gr_create_pitcher_overview_card, inputs=[name, season], outputs=overview_card)
68
+
69
 
70
  return app
71
 
72
  if __name__ == '__main__':
73
+ create_pitcher_overview(data_df).launch()
plotting.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib as mpl
2
+ import matplotlib.pyplot as plt
3
+ from matplotlib import transforms
4
+ from matplotlib.colors import LinearSegmentedColormap
5
+ import polars as pl
6
+ from pyfonts import load_google_font
7
+ from scipy.stats import gaussian_kde
8
+ import numpy as np
9
+
10
+ from types import SimpleNamespace
11
+ from datetime import date
12
+
13
+ from data import data_df
14
+ from convert import ball_kind_code_to_color, get_text_color_from_color
15
+
16
+ mpl.use('Agg')
17
+
18
+ def compute_team_games(data):
19
+ data = (
20
+ data
21
+ .with_columns(
22
+ pl.col('gameId').unique().len().over('HomeTeamNameES').alias('home_games'),
23
+ pl.col('gameId').unique().len().over('VisitorTeamNameES').alias('visitor_games')
24
+ )
25
+ )
26
+ game_data = (
27
+ data
28
+ .group_by('HomeTeamNameES')
29
+ .first()
30
+ [['HomeTeamNameES', 'home_games']]
31
+ .rename({'HomeTeamNameES': 'team'})
32
+ .join(
33
+ (
34
+ data
35
+ .group_by('VisitorTeamNameES')
36
+ .first()
37
+ [['VisitorTeamNameES', 'visitor_games']]
38
+ .rename({'VisitorTeamNameES': 'team'})
39
+ ),
40
+ on='team',
41
+ )
42
+ .with_columns((pl.col('home_games')+pl.col('visitor_games')).alias('games'))
43
+ )
44
+
45
+ return (
46
+ data
47
+ .drop('home_games', 'visitor_games')
48
+ .join(
49
+ game_data[['team', 'games']].rename({'games': 'home_games'}),
50
+ left_on='HomeTeamNameES',
51
+ right_on='team'
52
+ )
53
+ .join(
54
+ game_data[['team', 'games']].rename({'games': 'visitor_games'}),
55
+ left_on='VisitorTeamNameES',
56
+ right_on='team'
57
+ )
58
+ )
59
+
60
+
61
+ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1):
62
+ source_data = data_df.filter(pl.col('ballKind_code') != '-')
63
+
64
+ if start_date is not None:
65
+ source_data = source_data.filter(pl.col('date') >= start_date)
66
+ if end_date is not None:
67
+ source_data = source_data.filter(pl.col('date') <= end_date)
68
+
69
+ if game_kind is not None:
70
+ source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
71
+
72
+ source_data = (
73
+ compute_team_games(source_data)
74
+ .with_columns(
75
+ pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
76
+ pl.col('inning_code').unique().len().over('pitId').alias('IP')
77
+ )
78
+ )
79
+
80
+ if min_ip == 'qualified':
81
+ source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
82
+ else:
83
+ source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
84
+
85
+
86
+ if lr is not None:
87
+ source_data = source_data.filter(pl.col('batLR') == lr)
88
+
89
+ pitch_stats = (
90
+ source_data
91
+ # .with_columns(
92
+ # pl.col('presult').is_in(['None', 'Balk', 'Batter interference', 'Catcher interference', 'Pitcher delay', 'Intentional walk', 'Unknown']).not_().alias('pitch'),
93
+ # pl.col('presult').is_in(['Swinging strike', 'Swinging strikeout']).alias('whiff'),
94
+ # )
95
+ # .with_columns(
96
+ # (pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
97
+ # (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
98
+ # )
99
+ .group_by('pitId', 'ballKind_code')
100
+ .agg(
101
+ pl.len().alias('count'),
102
+ pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
103
+ (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
104
+ (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
105
+ (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
106
+ )
107
+ .with_columns(
108
+ (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
109
+ (pl.col('count') >= min_pitches).alias('qualified')
110
+ )
111
+ .explode('batType')
112
+ .unnest('batType')
113
+ .pivot(on='batType', values='proportion')
114
+ .fill_null(0)
115
+ .with_columns(
116
+ (pl.col('G') + pl.col('B')).alias('GB%'),
117
+ (pl.col('F') + pl.col('P')).alias('FB%'),
118
+ pl.col('L').alias('LD%').round(2),
119
+ )
120
+ .drop('G', 'F', 'B', 'P', 'L')
121
+ .with_columns(
122
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
123
+ for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%']
124
+ )
125
+ .sort('pitId', 'count', descending=[False, True])
126
+ .filter(pl.col('pitId') == id)
127
+ )
128
+
129
+ pitch_shapes = (
130
+ source_data
131
+ .filter(
132
+ (pl.col('pitId') == id) &
133
+ pl.col('x').is_not_null() &
134
+ pl.col('y').is_not_null() &
135
+ (pl.col('ballSpeed') > 0)
136
+ )
137
+ [['pitId', 'ballKind_code', 'ballSpeed', 'x', 'y']]
138
+ )
139
+
140
+ pitcher_stats = (
141
+ source_data
142
+ .group_by('pitId')
143
+ .agg(
144
+ pl.col('pitcher_name').first(),
145
+ (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
146
+ (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
147
+ (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
148
+ pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
149
+ pl.first('qualified')
150
+ )
151
+ .explode('batType')
152
+ .unnest('batType')
153
+ .pivot(on='batType', values='proportion')
154
+ .fill_null(0)
155
+ .with_columns(
156
+ (pl.col('G') + pl.col('B')).alias('GB%'),
157
+ (pl.col('F') + pl.col('P')).alias('FB%'),
158
+ pl.col('L').alias('LD%'),
159
+ )
160
+ .drop('G', 'F', 'B', 'P', 'L')
161
+ .with_columns(
162
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
163
+ for stat in ['CSW%', 'K%', 'BB%', 'GB%']
164
+ )
165
+ .filter(pl.col('pitId') == id)
166
+ )
167
+
168
+ return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
169
+
170
+
171
+ def get_card_data(id, **kwargs):
172
+ both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
173
+ pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
174
+ pitch_stats = both.pitch_stats.join(left.pitch_stats, on='ballKind_code', how='full', suffix='_left').join(right.pitch_stats, on='ballKind_code', how='full', suffix='_right').fill_null(0)
175
+ return SimpleNamespace(
176
+ pitcher_stats=pitcher_stats,
177
+ pitch_stats=pitch_stats,
178
+ both_pitch_shapes=both.pitch_shapes,
179
+ left_pitch_shapes=left.pitch_shapes,
180
+ right_pitch_shapes=right.pitch_shapes
181
+ )
182
+
183
+
184
+ def plot_arsenal(ax, pitches):
185
+ ax.set_xlim(0, 11)
186
+ x = np.arange(len(pitches)) + 0.5
187
+ y = np.zeros(len(pitches))
188
+ ax.scatter(x, y, c=[ball_kind_code_to_color.get(pitch, 'C0') for pitch in pitches], s=170)
189
+ for i, pitch in enumerate(pitches):
190
+ color = ball_kind_code_to_color.get(pitch, 'C0')
191
+ ax.text(x=i+0.5, y=0, s=pitch, horizontalalignment='center', verticalalignment='center', font=font, color=get_text_color_from_color(color))
192
+
193
+
194
+ def plot_usage(ax, usages):
195
+ left = 0
196
+ height = 0.8
197
+ for pitch, usage in usages.iter_rows():
198
+ color = ball_kind_code_to_color[pitch]
199
+ ax.barh(0, usage, height=height, left=left, color=color)
200
+ if usage > 0.1:
201
+ ax.text(left+usage/2, 0, f'{usage:.0%}', horizontalalignment='center', verticalalignment='center', size=8, font=font, color=get_text_color_from_color(color))
202
+ left += usage
203
+ ax.set_xlim(0, 1)
204
+ ax.set_ylim(-height/2, height/2*2.75)
205
+
206
+
207
+ x_range = np.arange(-100, 100+1)
208
+ y_range = np.arange(0, 250+1)
209
+ X, Y = np.meshgrid(x_range, y_range)
210
+
211
+
212
+ def fit_pred_kde(data):
213
+ kde = gaussian_kde(data)
214
+ Z = kde(np.concat((X, Y)).reshape(2, -1)).reshape(*X.shape)
215
+ return Z
216
+
217
+
218
+ def plot_loc(ax, locs):
219
+ ax.set_aspect('equal', adjustable='datalim')
220
+ ax.set_ylim(-52, 252)
221
+ ax.add_patch(plt.Rectangle((-100, 0), width=200, height=250, facecolor='darkgray', edgecolor='dimgray'))
222
+ ax.add_patch(plt.Rectangle((-80, 25), width=160, height=200, facecolor='gainsboro', edgecolor='dimgray'))
223
+ ax.add_patch(plt.Rectangle((-60, 50), width=120, height=150, fill=False, edgecolor='yellowgreen', linestyle=':'))
224
+ ax.add_patch(plt.Rectangle((-40, 75), width=80, height=100, facecolor='ivory', edgecolor='darkgray'))
225
+ ax.add_patch(plt.Polygon([(0, -10), (45, -30), (51, -50), (-51, -50), (-45, -30), (0, -10)], facecolor='snow', edgecolor='darkgray'))
226
+
227
+ for (pitch,), _locs in locs.sort(pl.len().over('ballKind_code'), descending=True).group_by('ballKind_code', maintain_order=True):
228
+ if len(_locs) <= 2:
229
+ continue
230
+
231
+ Z = fit_pred_kde(_locs[['x', 'y']].to_numpy().T)
232
+ Z = Z / Z.sum()
233
+
234
+ Z_flat = Z.ravel()
235
+ sorted_Z = np.sort(Z_flat)
236
+ sorted_Z_idxs = np.argsort(Z_flat)
237
+ Z_cumsum = (sorted_Z).cumsum()
238
+ t = Z_flat[sorted_Z_idxs[np.argmin(np.abs(Z_cumsum - (1-0.68)))]]
239
+
240
+ ax.contourf(X, Y, Z, levels=[t, 1], colors=ball_kind_code_to_color[pitch], alpha=0.5)
241
+ ax.contour(X, Y, Z, levels=t.reshape(1), colors=ball_kind_code_to_color[pitch], alpha=0.75)
242
+
243
+
244
+ def plot_velo(ax, velos):
245
+ trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
246
+ for (pitch,), _velos in velos.group_by('ballKind_code'):
247
+ if len(_velos) <= 1:
248
+ continue
249
+
250
+ violin = ax.violinplot(_velos['ballSpeed'], orientation='horizontal', side='high', showextrema=False)
251
+ for _violin in violin['bodies']:
252
+ _violin.set_facecolor(ball_kind_code_to_color[pitch])
253
+ mean = _velos['ballSpeed'].mean()
254
+ ax.text(mean, 0.5, round(mean), horizontalalignment='center', verticalalignment='center', color='gray', alpha=0.75, font=font, transform=trans)
255
+
256
+
257
+ stat_cmap = LinearSegmentedColormap.from_list('stat', colors=['dodgerblue', 'snow', 'crimson'])
258
+
259
+
260
+ def plot_pitch_stats(ax, stats, stat_names):
261
+
262
+ ax.set_aspect('equal', adjustable='datalim')
263
+
264
+ # axis_to_data = lambda coords: ax.transData.inverted().transform(ax.transAxes.transform(coords))
265
+
266
+ table = mpl.table.Table(ax)
267
+ rows = len(stat_names) + 1
268
+ cols = len(stats) + 1
269
+
270
+ cell_height = 1/rows
271
+ cell_width = 1/cols
272
+
273
+
274
+ for row, stat in enumerate(stat_names, start=1):
275
+ cell = table.add_cell(row=row, col=0, width=cell_width, height=cell_height, text=stat, loc='center', fontproperties=font, edgecolor='white')
276
+
277
+ for col, pitch in enumerate(stats['ballKind_code'], start=1):
278
+
279
+ color = ball_kind_code_to_color.get(pitch, 'C0')
280
+ cell = table.add_cell(row=0, col=col, width=cell_width, height=cell_height, text=pitch, loc='center', fontproperties=font, facecolor=color, edgecolor='white')
281
+ cell.get_text().set_color(get_text_color_from_color(color))
282
+
283
+ _stats = stats.filter(pl.col('ballKind_code') == pitch)
284
+ qualified = _stats['qualified'].item()
285
+ for row, stat_name in enumerate(stat_names, start=1):
286
+ stat = _stats[stat_name].item()
287
+ stat_pctl = _stats[f'{stat_name}_pctl'].item()
288
+ cell = table.add_cell(row=row, col=col, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
289
+ if not qualified:
290
+ cell.get_text().set_color('gray')
291
+ ax.add_artist(table)
292
+
293
+
294
+ def plot_pitcher_stats(ax, stats, stat_names):
295
+
296
+ ax.set_aspect('equal', adjustable='datalim')
297
+
298
+ table = mpl.table.Table(ax)
299
+
300
+ cell_height = 1
301
+ cell_width = 1/(len(stat_names)*2)
302
+
303
+ qualified = stats['qualified'].item()
304
+
305
+ for i, stat_name in enumerate(stat_names):
306
+ stat = stats[stat_name].item()
307
+ stat_pctl = stats[f'{stat_name}_pctl'].item()
308
+
309
+ table.add_cell(row=0, col=i*2, width=cell_width, height=cell_height, text=stat_name, loc='center', fontproperties=font, edgecolor='white')
310
+ cell = table.add_cell(row=0, col=i*2+1, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
311
+ if not qualified:
312
+ cell.get_text().set_color('gray')
313
+ ax.add_artist(table)
314
+
315
+
316
+ font = load_google_font('Saira Extra Condensed', weight='medium')
317
+
318
+
319
+ def create_pitcher_overview_card(id, season, dpi=300):
320
+ data = get_card_data(id, start_date=date(season, 1, 1), end_date=date(season, 12, 31), game_kind='Regular Season', min_pitches=100)
321
+
322
+ fig = plt.figure(figsize=(1080/300, 1350/300), dpi=dpi)
323
+ gs = fig.add_gridspec(8, 6, height_ratios=[1, 1, 1.5, 6, 1, 3, 1, 0.5])
324
+ title_ax = fig.add_subplot(gs[0, :])
325
+ title_ax.text(x=0, y=0, s=data.pitcher_stats['pitcher_name'].item().upper(), verticalalignment='baseline', font=font, size=20)
326
+ # title_ax.text(x=1, y=1, s='2021\n-2023', horizontalalignment='right', verticalalignment='top', font=font, size=8)
327
+ title_ax.text(x=0.95, y=0, s=season, horizontalalignment='right', verticalalignment='baseline', font=font, size=20)
328
+ title_ax.text(x=1, y=0.5, s='REG', horizontalalignment='right', verticalalignment='center', font=font, size=10, rotation='vertical')
329
+
330
+ arsenal_ax = fig.add_subplot(gs[1, :])
331
+ plot_arsenal(arsenal_ax, data.pitch_stats['ballKind_code'])
332
+
333
+
334
+ usage_l_ax = fig.add_subplot(gs[2, :3])
335
+ plot_usage(usage_l_ax, data.pitch_stats[['ballKind_code', 'usage_left']])
336
+ usage_l_ax.text(0, 1, 'LHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_l_ax.transAxes)
337
+
338
+ usage_r_ax = fig.add_subplot(gs[2, 3:])
339
+ plot_usage(usage_r_ax, data.pitch_stats[['ballKind_code', 'usage_right']])
340
+ usage_r_ax.text(0, 1, 'RHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_r_ax.transAxes)
341
+
342
+ loc_l_ax = fig.add_subplot(gs[3, :3])
343
+ loc_l_ax.text(0, 1, 'LHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_l_ax.transAxes)
344
+ plot_loc(loc_l_ax, data.left_pitch_shapes)
345
+
346
+ loc_r_ax = fig.add_subplot(gs[3, 3:])
347
+ loc_r_ax.text(0, 1, 'RHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_r_ax.transAxes)
348
+ plot_loc(loc_r_ax, data.right_pitch_shapes)
349
+
350
+ velo_ax = fig.add_subplot(gs[4, :])
351
+ plot_velo(velo_ax, data.both_pitch_shapes)
352
+ velo_ax.text(0, 1, 'Velo', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=velo_ax.transAxes)
353
+
354
+ pitch_stats_ax = fig.add_subplot(gs[5, :])
355
+ plot_pitch_stats(pitch_stats_ax, data.pitch_stats, ['CSW%', 'GB%'])
356
+
357
+ pitcher_stats_ax = fig.add_subplot(gs[6, :])
358
+ plot_pitcher_stats(pitcher_stats_ax, data.pitcher_stats, ['CSW%', 'K%', 'BB%', 'GB%'])
359
+
360
+
361
+ # k_ax = fig.add_subplot(gs[5, :2])
362
+ # plot_stat(k_ax, data.pitcher_stats, 'K%')
363
+
364
+ # bb_ax = fig.add_subplot(gs[5, 2:4])
365
+ # plot_stat(bb_ax, data.pitcher_s`tats, 'BB%')
366
+
367
+ # gb_ax = fig.add_subplot(gs[5, 4:])
368
+ # plot_stat(gb_ax, data.pitcher_stats, 'GB%')
369
+
370
+ credits_ax = fig.add_subplot(gs[7, :])
371
+ credits_ax.text(x=0, y=0.5, s='Data: SPAIA, Sanspo', verticalalignment='center', font=font, size=7)
372
+ credits_ax.text(x=1, y=0.5, s='@yakyucosmo', horizontalalignment='right', verticalalignment='center', font=font, size=7)
373
+
374
+ for ax in [
375
+ title_ax,
376
+ arsenal_ax,
377
+ usage_l_ax, usage_r_ax,
378
+ loc_l_ax, loc_r_ax,
379
+ velo_ax,
380
+ # k_ax, bb_ax, gb_ax,
381
+ pitch_stats_ax,
382
+ pitcher_stats_ax,
383
+ credits_ax
384
+ ]:
385
+ ax.axis('off')
386
+ ax.tick_params(
387
+ axis='both',
388
+ which='both',
389
+ length=0,
390
+ labelbottom=False,
391
+ labelleft=False
392
+ )
393
+
394
+ return fig
395
+ # fig = create_card('1600153', season=2023, dpi=300)
396
+ # plt.show()