patrickramos commited on
Commit
d3fa801
·
1 Parent(s): 56c6bd4

Update player name translation, add daily and weekly leaderboards

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,13 +1,17 @@
1
  import gradio as gr
 
2
 
3
  from data import data_df
4
  from pitcher_overview import create_pitcher_overview
5
  from pitch_leaderboard import create_pitch_leaderboard
 
6
  from css import css
7
 
 
 
8
  updated = '2025-07-21'
9
  limitations = '''**General Limitations**
10
- - Foreign players names are in Hebpurn romanization. Contact me if you need a card for a foreign player.
11
  '''
12
 
13
  if __name__ == '__main__':
@@ -16,6 +20,8 @@ if __name__ == '__main__':
16
  create_pitcher_overview(data_df)
17
  with gr.Tab('Pitch Leaderboard'):
18
  create_pitch_leaderboard()
 
 
19
 
20
  gr.Markdown(f'Last updated: {updated}')
21
  gr.Markdown(limitations)
 
1
  import gradio as gr
2
+ import matplotlib as mpl
3
 
4
  from data import data_df
5
  from pitcher_overview import create_pitcher_overview
6
  from pitch_leaderboard import create_pitch_leaderboard
7
+ from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
8
  from css import css
9
 
10
+ mpl.use('Agg')
11
+
12
  updated = '2025-07-21'
13
  limitations = '''**General Limitations**
14
+ - As new players make their debut, some names may not be translated/transliterated correctly.
15
  '''
16
 
17
  if __name__ == '__main__':
 
20
  create_pitcher_overview(data_df)
21
  with gr.Tab('Pitch Leaderboard'):
22
  create_pitch_leaderboard()
23
+ with gr.Tab('Daily/Weekly Leaderboard'):
24
+ create_daily_weekly_leaderboard_app(data_df)
25
 
26
  gr.Markdown(f'Last updated: {updated}')
27
  gr.Markdown(limitations)
assets/white_insignias/chunichi.png ADDED

Git LFS Details

  • SHA256: 1dab8730d9aecff64e1c22f2064a1758778ccb39ef1781078100214d0b8b9c6f
  • Pointer size: 130 Bytes
  • Size of remote file: 22.8 kB
assets/white_insignias/dena.png ADDED

Git LFS Details

  • SHA256: 39f5cedce6cfd0651c8af14dff107c23353812e60b430e5b0eb8ce72c7302b9b
  • Pointer size: 130 Bytes
  • Size of remote file: 65.9 kB
assets/white_insignias/hanshin.png ADDED

Git LFS Details

  • SHA256: f2b0b2b19d8dd804844f89870063fcda59788aa79bf783c36934cf2450d55e27
  • Pointer size: 130 Bytes
  • Size of remote file: 28.9 kB
assets/white_insignias/hiroshima.png ADDED

Git LFS Details

  • SHA256: ed5b9da7b40fa70813ef95a8abd602cccd022dc3fe3c38414ccf2fe3b584d275
  • Pointer size: 130 Bytes
  • Size of remote file: 51.7 kB
assets/white_insignias/lotte.png ADDED

Git LFS Details

  • SHA256: ecf18099471022b94c7495fa733bebd2646dd0a50de3ef96785173a8cfe3ee0b
  • Pointer size: 130 Bytes
  • Size of remote file: 47.6 kB
assets/white_insignias/nipponham.png ADDED

Git LFS Details

  • SHA256: 4c78ed39f90ccd6455d53158fd171dc4ff5ea94af9c6082d3e19e7356fc5d698
  • Pointer size: 130 Bytes
  • Size of remote file: 13 kB
assets/white_insignias/orix.png ADDED

Git LFS Details

  • SHA256: e98ccfb6b4cc8ec350dd6d1da1e4df52fc3f85b5f74ea4d7f7fe96fd2c179f1b
  • Pointer size: 130 Bytes
  • Size of remote file: 52.3 kB
assets/white_insignias/rakuten.png ADDED

Git LFS Details

  • SHA256: 5dfe430c78c848f027323e8a3a4b47563ac8a15c4b53624ceaa3d46dd97d987c
  • Pointer size: 130 Bytes
  • Size of remote file: 90.8 kB
assets/white_insignias/seibu.png ADDED

Git LFS Details

  • SHA256: 104f44608c11b0c5473fdcf40ee195983dccd9bec7836c5c928086f0af295fea
  • Pointer size: 130 Bytes
  • Size of remote file: 32.2 kB
assets/white_insignias/softbank.png ADDED

Git LFS Details

  • SHA256: 3d96f2c8b737be6a4703d22707bee26a39aea7a37e78b38ab4b3b925e9e36cba
  • Pointer size: 130 Bytes
  • Size of remote file: 39.6 kB
assets/white_insignias/yakult.png ADDED

Git LFS Details

  • SHA256: f71c204e4d4f5bbafd7941ff3a587b303465f6450003de5f3cbc70ba42e42017
  • Pointer size: 130 Bytes
  • Size of remote file: 43.4 kB
assets/white_insignias/yomiuri.png ADDED

Git LFS Details

  • SHA256: e4de837fb1f68aef804cb5722427573acbcc3742edf5bfd100f5b3d7e6f67f37
  • Pointer size: 130 Bytes
  • Size of remote file: 47.2 kB
daily_weekly_leaderboard.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+
4
+ import datetime
5
+
6
+ from plotting import create_daily_weekly_leaderboard
7
+ from data import data_df
8
+
9
+
10
+ def gr_create_daily_weekly_leaderboards(leaderboard_date, whiff_leaders, velo_leaders, data_df):
11
+
12
+ filenames = []
13
+ for time_type in ('daily', 'weekly'):
14
+ for stat, leaders in zip(('whiff', 'velo'), (whiff_leaders, velo_leaders)):
15
+ create_daily_weekly_leaderboard(stat, leaderboard_date, time_type, leaders, data_df)
16
+ filename = f'{stat}_{time_type}.png'
17
+ plt.savefig(filename, bbox_inches='tight')
18
+ filenames.append(filename)
19
+
20
+ return filenames
21
+
22
+
23
+ def go_back_day(date):
24
+ return date - datetime.timedelta(days=1)
25
+
26
+
27
+ def go_forward_day(date):
28
+ return date + datetime.timedelta(days=1)
29
+
30
+
31
+ def go_back_week(date):
32
+ return date - datetime.timedelta(days=7)
33
+
34
+
35
+ def go_forward_week(date):
36
+ return date + datetime.timedelta(days=7)
37
+
38
+
39
+ def create_daily_weekly_leaderboard_app(data_df):
40
+ with gr.Blocks() as app:
41
+ gr.Markdown('# Daily/Weekly Leaderboards')
42
+
43
+ _data_df = gr.State(data_df)
44
+
45
+ with gr.Row():
46
+ date_init = data_df['date'].max()
47
+ date_init = datetime.datetime(date_init.year, date_init.month, date_init.day)
48
+ leaderboard_date = gr.DateTime(date_init, include_time=False, type='datetime', label='Date')
49
+
50
+ whiff_leaders = gr.Number(10, precision=0, minimum=0, label='Whiff Leaders')
51
+ velo_leaders = gr.Number(10, precision=0, minimum=0, label='Velo Leaders')
52
+ search = gr.Button('Search')
53
+
54
+ with gr.Row():
55
+ prev_week = gr.Button('Previous Week')
56
+ prev_day = gr.Button('Previous Day')
57
+ next_day = gr.Button('Next Day')
58
+ next_week = gr.Button('Next Week')
59
+
60
+ leaderboards = []
61
+ for time_type in ('Daily', 'Weekly'):
62
+ with gr.Row():
63
+ gr.Markdown(f'## {time_type}')
64
+ with gr.Row():
65
+ for stat in ('Whiff', 'Velo'):
66
+ leaderboards.append(gr.Image(label=f'{time_type} {stat} Leaderboard', height=512))
67
+
68
+
69
+ search.click(gr_create_daily_weekly_leaderboards, inputs=[leaderboard_date, whiff_leaders, velo_leaders, _data_df], outputs=leaderboards)
70
+
71
+ for btn, fn in (
72
+ (prev_day, go_back_day),
73
+ (next_day, go_forward_day),
74
+ (prev_week, go_back_week),
75
+ (next_week, go_forward_week)
76
+ ):
77
+ btn.click(fn, inputs=leaderboard_date, outputs=leaderboard_date)
78
+
79
+ return app
80
+
81
+ if __name__ == '__main__':
82
+ app = create_daily_weekly_leaderboard_app(data_df)
83
+ app.launch()
data.py CHANGED
@@ -3,6 +3,9 @@ import os
3
  from tqdm.auto import tqdm
4
  import pykakasi
5
  from huggingface_hub import snapshot_download
 
 
 
6
 
7
  from convert import (
8
  aux_global_id_to_code, presult,
@@ -44,44 +47,70 @@ for season in tqdm(SEASONS):
44
  _aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
45
  aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
46
 
47
- players_df = pl.read_parquet(os.path.join(DATA_PATH, 'players.parquet'))
48
- kana_df = pl.read_parquet(os.path.join(DATA_PATH, 'players_kana.parquet'))
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  kks = pykakasi.kakasi()
51
- kana_df = (
52
- kana_df
 
 
53
  .with_columns(
54
- pl.col('name').str.normalize('NFKC'),
55
- (
56
- pl.col('name_kana')
57
- .map_elements(
58
- lambda name: ''.join([word['hepburn'].capitalize() for word in kks.convert(name)]),
59
- return_dtype=pl.String
 
 
 
 
 
 
 
 
 
 
 
 
60
  )
61
- .alias('name_en')
62
  )
 
 
63
  )
64
- .with_columns(pl.col('name_en').str.to_lowercase())
65
  )
66
 
67
- for old_part, new_part in [
68
- ('you', 'yo'),
69
- ('kou', 'ko'),
70
- ('gou', 'go'),
71
- ('shou', 'sho'),
72
- ('jou', 'jo'),
73
- ('rou', 'ro'),
74
- ('ou', 'oh'),
75
- ('shuu', 'shu'),
76
- ('ryuu', 'ryu'),
77
- ('yuu', 'yu'),
78
- ('oo', 'o') # messes with someone whose name ends in koo
79
- ]:
80
- kana_df = kana_df.with_columns(pl.col('name_en').str.replace(old_part, new_part))
81
-
82
- kana_df = kana_df.with_columns(pl.col('name_en').str.to_titlecase())
83
-
84
- players_df = players_df.with_columns(pl.col('playerName').str.normalize('NFKC'))
85
  for old_char, new_char in [
86
  ('崎', '﨑'),
87
  ('高', '髙'),
@@ -91,13 +120,33 @@ for old_char, new_char in [
91
  ]:
92
  players_df = (
93
  players_df.with_columns(
94
- pl.when(~pl.col('playerName').is_in(kana_df['name']))
95
  .then(pl.col('playerName').str.replace(old_char, new_char))
96
  .otherwise('playerName')
97
  )
98
  )
99
 
100
- players_df = players_df.join(kana_df, left_on='playerName', right_on='name', how='left')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  aux_df = (
103
  aux_df
 
3
  from tqdm.auto import tqdm
4
  import pykakasi
5
  from huggingface_hub import snapshot_download
6
+ import numpy as np
7
+
8
+ from string import ascii_letters
9
 
10
  from convert import (
11
  aux_global_id_to_code, presult,
 
47
  _aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
48
  aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
49
 
 
 
50
 
51
+ def select_name(names):
52
+ '''
53
+ When given mutiple names,
54
+ prioritizes the name with ASCII characters (ex. R. マルティネス > マルティネス),
55
+ followed by the shorter name (ex. 大勢 > 翁田 大勢)
56
+ Names with ASCII characters help differentiate between foreign players,
57
+ whlie shorter names are more accurate for players going by shorter names
58
+ '''
59
+ lens = []
60
+ for name in names:
61
+ if any([char in ascii_letters for char in name]):
62
+ return name
63
+ else:
64
+ lens.append(len(name))
65
+ return names[np.argmin(lens).item()]
66
+
67
+ # load player dfs
68
+ players_df = (
69
+ pl.read_parquet('files/players.parquet')
70
+ .with_columns(pl.col('playerName').str.normalize('NFKC').str.replace_all('・', ' '))
71
+ .group_by('playerId').agg(pl.col('playerName').map_elements(select_name, return_dtype=pl.String))
72
+ )
73
+ translated_df = (
74
+ pl.read_parquet('files/players_translated.parquet')
75
+ .with_columns(pl.col('name_jp').str.normalize('NFKC').str.replace_all('・', ' '))
76
+ )
77
+ manual_translated_df = pl.read_parquet('files/players_translated_manual.parquet')
78
+
79
+ # names with no romanization are approximated with kana translation
80
  kks = pykakasi.kakasi()
81
+
82
+ # take names in parenthesis when they contain an ascii character
83
+ translated_df = (
84
+ translated_df
85
  .with_columns(
86
+ pl.when(pl.col('name_jp').str.contains(r'\('))
87
+ .then(pl.col('name_jp').str.extract(r'.*\(', 0).str.strip_chars_end(' ('))
88
+ .otherwise(pl.col('name_jp'))
89
+ .str.replace_all('・', ' ')
90
+ .alias('name_jp')
91
+ )
92
+ .with_columns(pl.col('name_kana').str.normalize('NFKC').str.replace_all('・', ' '))
93
+ .with_columns(pl.col('name_kana').str.extract(r'\(.*\)', 0).str.strip_chars('()').alias('in_parentheses'))
94
+ .with_columns(pl.col('name_kana').str.extract(r'.*\(', 0).str.strip_chars_end('(').alias('before_parentheses'))
95
+ .with_columns(
96
+ pl.when(pl.col('name_en').is_null())
97
+ .then
98
+ (
99
+ pl.when(pl.col('in_parentheses').is_not_null() | pl.col('before_parentheses').is_not_null())
100
+ .then(
101
+ pl.when(pl.col('in_parentheses').map_elements(lambda name: any([char in ascii_letters for char in name]), pl.Boolean))
102
+ .then(pl.col('in_parentheses'))
103
+ .otherwise(pl.col('before_parentheses'))
104
  )
105
+ .otherwise(pl.col('name_kana').map_elements(lambda name: ''.join([word['hepburn'].capitalize() for word in kks.convert(name)]), return_dtype=pl.String))
106
  )
107
+ .otherwise(pl.col('name_en'))
108
+ .alias('name_en')
109
  )
110
+ .with_columns(pl.col('name_en').str.replace_all(',', '').str.to_titlecase())
111
  )
112
 
113
+ # handle inconsistent kanji between sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  for old_char, new_char in [
115
  ('崎', '﨑'),
116
  ('高', '髙'),
 
120
  ]:
121
  players_df = (
122
  players_df.with_columns(
123
+ pl.when(~pl.col('playerName').is_in(translated_df['name_jp']))
124
  .then(pl.col('playerName').str.replace(old_char, new_char))
125
  .otherwise('playerName')
126
  )
127
  )
128
 
129
+ # merge player dfs
130
+ players_df = (
131
+ players_df
132
+ .join(manual_translated_df.rename({'name_en': 'name_en_manual'}), on='playerId', how='left')
133
+ .join(
134
+ (
135
+ translated_df
136
+ .with_columns(
137
+ pl.when(pl.col('name_jp').str.contains(r'\.') & ~pl.col('name_jp').is_in(players_df.filter(pl.len().over('playerName') == 1)['playerName']))
138
+ .then(pl.col('name_jp').str.strip_chars(ascii_letters+'.'))
139
+ .otherwise('name_jp')
140
+ )
141
+ [['name_jp', 'name_en']]
142
+ ),
143
+ left_on='playerName', right_on='name_jp', how='left'
144
+ )
145
+ .with_columns(pl.coalesce('name_en_manual', 'name_en').alias('name_en'))
146
+ .unique() # remove duplicates from names with multiple matches in other dataframes
147
+ .drop('name_en_manual', 'name_jp')
148
+ # .filter(pl.col('name_en').is_null())
149
+ )
150
 
151
  aux_df = (
152
  aux_df
pitch_leaderboard.py CHANGED
@@ -139,7 +139,7 @@ def create_pitch_leaderboard():
139
  all_teams = gr.Button('Select/Deselect all teams')
140
 
141
  search = gr.Button('Search')
142
- # pin_columns = gr.Checkbox(True, 'Pin columns')
143
  leaderboard = gr.DataFrame(
144
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
145
  column_widths=[125, 75, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
@@ -159,6 +159,11 @@ def create_pitch_leaderboard():
159
  # inputs=pin_columns,
160
  # outputs=leaderboard
161
  # )
 
 
 
 
 
162
 
163
  return app
164
 
 
139
  all_teams = gr.Button('Select/Deselect all teams')
140
 
141
  search = gr.Button('Search')
142
+ pin_columns = gr.Button('Pin columns')
143
  leaderboard = gr.DataFrame(
144
  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
145
  column_widths=[125, 75, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
 
159
  # inputs=pin_columns,
160
  # outputs=leaderboard
161
  # )
162
+ pin_columns.click(
163
+ lambda : gr.update(pinned_columns=None),
164
+ # inputs=pin_columns,
165
+ outputs=leaderboard
166
+ )
167
 
168
  return app
169
 
pitcher_overview.py CHANGED
@@ -10,7 +10,6 @@ notes = '''**Limitations**
10
  - Only supports regular season data
11
 
12
  **To-do**
13
- - Fix names of foreign players
14
  - Add teams insignias
15
  - Measure percentiles per pitcher handedness
16
  - Allow for arbitrary date ranges
 
10
  - Only supports regular season data
11
 
12
  **To-do**
 
13
  - Add teams insignias
14
  - Measure percentiles per pitcher handedness
15
  - Allow for arbitrary date ranges
plotting.py CHANGED
@@ -6,15 +6,15 @@ import polars as pl
6
  from pyfonts import load_google_font
7
  from scipy.stats import gaussian_kde
8
  import numpy as np
 
9
 
10
  from types import SimpleNamespace
 
11
  from datetime import date
 
12
 
13
- from convert import ball_kind_code_to_color, get_text_color_from_color
14
- from stats import get_pitcher_stats
15
-
16
-
17
- mpl.use('Agg')
18
 
19
 
20
  def get_card_data(id, **kwargs):
@@ -243,3 +243,165 @@ def create_pitcher_overview_card(id, season, dpi=300):
243
  return fig
244
  # fig = create_card('1600153', season=2023, dpi=300)
245
  # plt.show()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from pyfonts import load_google_font
7
  from scipy.stats import gaussian_kde
8
  import numpy as np
9
+ from PIL import Image
10
 
11
  from types import SimpleNamespace
12
+ import datetime
13
  from datetime import date
14
+ import os
15
 
16
+ from convert import ball_kind_code_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
17
+ from stats import get_pitcher_stats, filter_data_by_date_and_game_kind
 
 
 
18
 
19
 
20
  def get_card_data(id, **kwargs):
 
243
  return fig
244
  # fig = create_card('1600153', season=2023, dpi=300)
245
  # plt.show()
246
+
247
+ # DAILY/WEEKLY LEADERBOARDS
248
+
249
+ def get_whiff_leaderboard_data(data, leaders, include_date):
250
+ data = (
251
+ data
252
+ .group_by('pitId', 'pitcher_team_name_short', 'date')
253
+ .agg(
254
+ pl.col('pitcher_name').first(),
255
+ pl.col('whiff').sum().alias('Whiffs')
256
+ )
257
+ .sort(['Whiffs', 'pitcher_name'], descending=[True, False])
258
+ )
259
+ # if len(data) > 0:
260
+ # data = data.filter(pl.col('Whiffs') >= data['Whiffs'][min(leaders, len(data)-1)])
261
+ data = (
262
+ data
263
+ .rename({'pitcher_name': 'Player', 'pitcher_team_name_short': 'Team'})
264
+ .with_columns(
265
+ pl.col('date').dt.to_string('%m.%d').alias('Date'),
266
+ pl.col('Whiffs').rank(descending=True, method='min').alias('Rank')
267
+ )
268
+ [['Rank', 'Team', 'Player'] + (['Date'] if include_date else []) + ['Whiffs']]
269
+ # .with_row_index('Rank', 1)
270
+ )
271
+ # data = data.filter(pl.col('Rank') <= leaders)
272
+ data = data.filter(pl.col('Rank') <= data.group_by('Rank').agg(pl.len()).sort('Rank').filter(pl.col('len').cum_sum()>=leaders)['Rank'].min())
273
+ return data
274
+
275
+
276
+ def get_velo_leaderboard_data(data, leaders):
277
+ data = data.sort(['ballSpeed', 'pitcher_name'], descending=[True, False])
278
+ # if len(data) > 0:
279
+ # data = data.filter(pl.col('ballSpeed') >= data['ballSpeed'][min(leaders, len(data)-1)])
280
+ data = (
281
+ data
282
+ .rename({'ballSpeed': 'KPH', 'pitcher_name': 'Player', 'pitcher_team_name_short': 'Team'})
283
+ # .with_row_index('Rank', 1)
284
+ .with_columns(
285
+ (pl.col('KPH') / 1.609).round(1).alias('MPH'),
286
+ pl.col('KPH').rank(descending=True, method='min').alias('Rank')
287
+ )
288
+ [['Rank', 'Team', 'Player', 'KPH', 'MPH']]
289
+
290
+ )
291
+ # data = data.filter(pl.col('Rank') <= leaders)
292
+ data = data.filter(pl.col('Rank') <= data.group_by('Rank').agg(pl.len()).sort('Rank').filter(pl.col('len').cum_sum()>=leaders)['Rank'].min())
293
+ return data
294
+
295
+
296
+ def create_daily_weekly_leaderboard(stat, leaderboard_date, time_type, leaders, data):
297
+
298
+ font = load_google_font('Saira Extra Condensed', weight='medium')
299
+ bold_font = load_google_font('Saira Extra Condensed', weight='bold')
300
+ date_font = load_google_font('Lekton', weight='bold')
301
+
302
+ assert stat in ('velo', 'whiff')
303
+ assert time_type in ('daily', 'weekly')
304
+
305
+ if time_type == 'daily':
306
+ data = filter_data_by_date_and_game_kind(data, start_date=leaderboard_date, end_date=leaderboard_date)
307
+ else:
308
+ monday = leaderboard_date - datetime.timedelta(days=leaderboard_date.weekday())
309
+ sunday = leaderboard_date + datetime.timedelta(days=6-leaderboard_date.weekday())
310
+ data = filter_data_by_date_and_game_kind(data, start_date=monday, end_date=sunday)
311
+
312
+ leaderboard = get_velo_leaderboard_data(data, leaders) if stat == 'velo' else get_whiff_leaderboard_data(data, leaders, include_date=time_type=='weekly')
313
+ stats = [col for col in leaderboard.columns if col not in ['Rank', 'Team', 'Player']]
314
+ stat_col_lens = [1 if max(leaderboard[stat].cast(pl.String).str.len_chars().max() or 0, len(stat)) < 5 else 1.5 for stat in stats]
315
+
316
+ dpi = 300
317
+
318
+ fig = plt.figure(figsize=(1080/300, 1350/300), dpi=dpi)
319
+ gs = fig.add_gridspec(
320
+ max(len(leaderboard), 1)+2,
321
+ 3+len(stats),
322
+ height_ratios=[1] + ([9/(len(leaderboard)+1)] * (len(leaderboard)+1) if len(leaderboard) else [1, 8]),
323
+ width_ratios=[1, 1, 8-sum(stat_col_lens)] + stat_col_lens
324
+ )
325
+
326
+ data_offset = 2
327
+
328
+ axs = []
329
+ def create_and_add_subplot(indexed_gs):
330
+ ax = fig.add_subplot(indexed_gs)
331
+ axs.append(ax)
332
+ return ax
333
+
334
+ title_ax = create_and_add_subplot(gs[0, :])
335
+ title_ax.text(0, 0.1, f'{time_type.upper()} {stat.upper()} LEADERBOARD', verticalalignment='baseline', font=bold_font, size=15)
336
+
337
+ if time_type == 'daily':
338
+ title_ax.text(1, 0.1, leaderboard_date.strftime('%Y.%m.%d (%a)'), verticalalignment='baseline', horizontalalignment='right', font=date_font, size=7)
339
+ else:
340
+
341
+ title_ax.text(1, 0.1, monday.strftime('%Y.%m.%d (%a)')+'\n-'+sunday.strftime('%Y.%m.%d (%a)'), verticalalignment='baseline', horizontalalignment='right', font=date_font, size=7)
342
+
343
+ rank_ax = create_and_add_subplot(gs[data_offset-1, 0])
344
+ rank_ax.text(0.5, 0, 'RANK', verticalalignment='bottom', horizontalalignment='center', font=bold_font)
345
+
346
+ team_ax = create_and_add_subplot(gs[data_offset-1, 1])
347
+ team_ax.text(0.5, 0, 'TEAM', verticalalignment='bottom', horizontalalignment='center', font=bold_font)
348
+
349
+ player_ax = create_and_add_subplot(gs[data_offset-1, 2])
350
+ player_ax.text(0, 0, 'PLAYER', verticalalignment='bottom', font=bold_font)
351
+
352
+ for col, stat in enumerate(stats, start=3):
353
+ stat_ax = create_and_add_subplot(gs[data_offset-1, col])
354
+ stat_ax.text(0.5, 0, stat.upper(), verticalalignment='bottom', horizontalalignment='center', font=bold_font)
355
+
356
+ midline_ax = create_and_add_subplot(gs[data_offset-1, :])
357
+ midline_ax.add_patch(plt.Rectangle((0, 0), 1, 0.01, color='black'))
358
+
359
+ if len(leaderboard) == 0:
360
+ blank_ax = create_and_add_subplot(gs[data_offset:])
361
+
362
+ for i, row in enumerate(leaderboard.iter_rows()):
363
+ rank, team, player, *stats = row
364
+ rank_ax = create_and_add_subplot(gs[i+data_offset, 0])
365
+ rank_ax.text(0.5, 0.5, rank, verticalalignment='center_baseline', horizontalalignment='center', font=font)
366
+
367
+ team_ax = create_and_add_subplot(gs[i+data_offset, 1])
368
+ image = Image.open(os.path.join('assets', 'white_insignias', f'{team.lower()}.png'))
369
+
370
+ w, h = image.size
371
+ new_longer_side = 512
372
+ if w > h:
373
+ w, h = (new_longer_side, round(h*new_longer_side/w))
374
+ else:
375
+ w, h = (round(w*new_longer_side/h), new_longer_side)
376
+ image = image.resize((w, h))
377
+ ax_s = 512*1.5
378
+ team_ax.set_xlim(0, ax_s)
379
+ team_ax.set_ylim(0, ax_s)
380
+ image = np.array(image)
381
+ circle = plt.Circle((ax_s/2, ax_s/2), radius=ax_s/2, color=team_names_short_to_color[team], clip_on=False, zorder=1)
382
+ team_ax.add_patch(circle)
383
+ team_ax.imshow(
384
+ image[..., -1],
385
+ cmap=LinearSegmentedColormap.from_list('tmp', [team_names_short_to_color[team], 'black' if team in ('Lotte', 'Hanshin') else 'white']),
386
+ extent=((ax_s-w)/2, ax_s-(ax_s-w)/2, (ax_s-h)/2, ax_s-(ax_s-h)/2),
387
+ zorder=2
388
+ )
389
+
390
+ player_ax = create_and_add_subplot(gs[i+data_offset, 2])
391
+ player_ax.text(0.02, 0.5, player.upper(), verticalalignment='center_baseline', font=font, color=get_text_color_from_team(team))
392
+ player_ax.add_patch(plt.Polygon([(0, 0), (0.98, 0), (1, 0.5), (1, 1), (0, 1)], color=team_names_short_to_color[team], clip_on=False))
393
+
394
+ for col, stat in enumerate(stats, start=3):
395
+ stat_ax = create_and_add_subplot(gs[i+data_offset, col])
396
+ stat_ax.text(0.5, 0.5, stat, verticalalignment='center_baseline', horizontalalignment='center', font=font)
397
+
398
+ for ax in axs:
399
+ ax.axis('off')
400
+ ax.tick_params(
401
+ axis='both',
402
+ which='both',
403
+ length=0,
404
+ labelbottom=False,
405
+ labelleft=False
406
+ )
407
+ return fig
stats.py CHANGED
@@ -35,10 +35,18 @@ def compute_team_games(data):
35
  .rename({'VisitorTeamNameES': 'team'})
36
  ),
37
  on='team',
 
 
 
 
 
 
 
 
38
  )
39
- .with_columns((pl.col('home_games')+pl.col('visitor_games')).alias('games'))
40
  )
41
 
 
42
  return (
43
  data
44
  .drop('home_games', 'visitor_games')
@@ -110,6 +118,51 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
110
  return pitch_stats
111
 
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
114
  source_data = data_df.filter(pl.col('ballKind_code') != '-')
115
 
 
35
  .rename({'VisitorTeamNameES': 'team'})
36
  ),
37
  on='team',
38
+ how='full'
39
+ )
40
+ .fill_null(0)
41
+ .with_columns(
42
+ (pl.col('home_games')+pl.col('visitor_games')).alias('games'),
43
+ pl.when(pl.col('team').is_null())
44
+ .then(pl.col('team_right'))
45
+ .otherwise(pl.col('team')).alias('team')
46
  )
 
47
  )
48
 
49
+
50
  return (
51
  data
52
  .drop('home_games', 'visitor_games')
 
118
  return pitch_stats
119
 
120
 
121
+ def compute_pitcher_stats(data, min_ip='qualified'):
122
+ data = data.filter(pl.col('ballKind') != '-')
123
+ data = (
124
+ compute_team_games(data)
125
+ .with_columns(
126
+ pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
127
+ pl.col('inning_code').unique().len().over('pitId').alias('IP') # inaccurate
128
+ )
129
+ )
130
+
131
+ if min_ip == 'qualified':
132
+ data = data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
133
+ else:
134
+ data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
135
+
136
+ pitcher_stats = (
137
+ data
138
+ .group_by('pitId', 'pitcher_team_name_short')
139
+ .agg(
140
+ pl.col('pitcher_name').first(),
141
+ (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
142
+ (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
143
+ (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
144
+ pl.col('whiff').sum().alias('Whiffs'),
145
+ pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
146
+ pl.first('qualified')
147
+ )
148
+ .explode('batType')
149
+ .unnest('batType')
150
+ .pivot(on='batType', values='proportion')
151
+ .fill_null(0)
152
+ .with_columns(
153
+ (pl.col('G') + pl.col('B')).alias('GB%'),
154
+ (pl.col('F') + pl.col('P')).alias('FB%'),
155
+ pl.col('L').alias('LD%'),
156
+ )
157
+ .drop('G', 'F', 'B', 'P', 'L', 'null')
158
+ .with_columns(
159
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
160
+ for stat in ['CSW%', 'K%', 'BB%', 'GB%']
161
+ )
162
+ )
163
+ return pitcher_stats
164
+
165
+
166
  def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
167
  source_data = data_df.filter(pl.col('ballKind_code') != '-')
168