Spaces:
Running
Running
Commit
·
a8b6a3f
1
Parent(s):
53e0878
Add files
Browse files- app.py +12 -0
- convert.py +259 -0
- data.py +168 -0
- pitcher_overview.py +36 -0
app.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from data import data_df
|
4 |
+
from pitcher_overview import create_pitcher_overview
|
5 |
+
|
6 |
+
|
7 |
+
with gr.Blocks() as demo:
|
8 |
+
|
9 |
+
create_pitcher_overview(data_df)
|
10 |
+
|
11 |
+
if __name__ == '__main__':
|
12 |
+
demo.launch()
|
convert.py
ADDED
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aux_global_id_to_code = {
|
2 |
+
7911: 'G',
|
3 |
+
7912: 'S',
|
4 |
+
7913: 'DB',
|
5 |
+
7914: 'D',
|
6 |
+
7915: 'T',
|
7 |
+
7916: 'C',
|
8 |
+
7917: 'F',
|
9 |
+
7918: 'E',
|
10 |
+
7919: 'L',
|
11 |
+
7920: 'M',
|
12 |
+
7921: 'B',
|
13 |
+
7922: 'H',
|
14 |
+
7925: 'PL',
|
15 |
+
7926: 'CL'
|
16 |
+
}
|
17 |
+
|
18 |
+
ball_kind = {
|
19 |
+
-1: '-',
|
20 |
+
31: 'Fastball (4-seam)',
|
21 |
+
32: 'Slider',
|
22 |
+
33: 'Vertical Slider',
|
23 |
+
34: 'Slurve',
|
24 |
+
35: 'Curve',
|
25 |
+
36: 'Slow Curve',
|
26 |
+
37: 'Power Curve',
|
27 |
+
38: 'Knuckle Curve',
|
28 |
+
39: 'Forkball',
|
29 |
+
40: 'Splitter',
|
30 |
+
41: 'Changeup',
|
31 |
+
42: 'Sinker',
|
32 |
+
43: 'Screwball',
|
33 |
+
44: 'Palmball',
|
34 |
+
45: 'Knuckleball',
|
35 |
+
46: 'Shootball',
|
36 |
+
47: 'Fastball (2-seam)',
|
37 |
+
48: 'Fastball (1-seam)',
|
38 |
+
49: 'Cutter',
|
39 |
+
50: 'Eephus', # technically "super" eephus but I haven't encountered a normal one yet
|
40 |
+
51: 'Hard Sinker',
|
41 |
+
52: 'Hard Slider',
|
42 |
+
}
|
43 |
+
|
44 |
+
ball_kind_code = {
|
45 |
+
-1: '-',
|
46 |
+
31: 'FF',
|
47 |
+
32: 'SL',
|
48 |
+
33: 'VS',
|
49 |
+
34: 'SV',
|
50 |
+
35: 'CU',
|
51 |
+
36: 'SC',
|
52 |
+
37: 'PC',
|
53 |
+
38: 'KC',
|
54 |
+
39: 'FO',
|
55 |
+
40: 'FS',
|
56 |
+
41: 'CH',
|
57 |
+
42: 'SI',
|
58 |
+
43: 'SB',
|
59 |
+
44: 'PB',
|
60 |
+
45: 'KN',
|
61 |
+
46: 'SH',
|
62 |
+
47: 'FT',
|
63 |
+
48: 'FW',
|
64 |
+
49: 'FC',
|
65 |
+
50: 'EP', # technically "super" eephus but I haven't encountered a normal one yet
|
66 |
+
51: 'HS',
|
67 |
+
52: 'HL'
|
68 |
+
}
|
69 |
+
|
70 |
+
lr = {1: 'l', 2: 'r'}
|
71 |
+
|
72 |
+
presult = {
|
73 |
+
0: 'None',
|
74 |
+
101: 'Foul',
|
75 |
+
102: 'Single',
|
76 |
+
103: 'Hit by pitch',
|
77 |
+
104: 'Double',
|
78 |
+
105: 'Triple',
|
79 |
+
106: 'Home run',
|
80 |
+
107: 'Error',
|
81 |
+
108: 'Groundout',
|
82 |
+
109: 'Flyout',
|
83 |
+
110: 'Lineout',
|
84 |
+
111: 'Groundout (Double play)',
|
85 |
+
112: 'Foul fly',
|
86 |
+
113: 'Foul line (?)',
|
87 |
+
114: 'Sacrifice bunt',
|
88 |
+
115: 'Sacrifice fly',
|
89 |
+
116: 'Swinging strike',
|
90 |
+
117: 'Looking strike',
|
91 |
+
118: 'Ball',
|
92 |
+
119: 'Walk',
|
93 |
+
120: 'Balk',
|
94 |
+
121: 'Batter interference',
|
95 |
+
122: 'Catcher interference',
|
96 |
+
123: 'Uncaught third strike',
|
97 |
+
124: 'Sacrifice hit error',
|
98 |
+
125: 'Sacrifice fly, error',
|
99 |
+
126: "Fielder's choice",
|
100 |
+
128: "Sacrifice fielder's choice",
|
101 |
+
129: 'Bunt strikeout',
|
102 |
+
130: 'Swinging strikeout',
|
103 |
+
131: 'Looking strikeout',
|
104 |
+
133: 'Inside-the-park home run',
|
105 |
+
134: 'Pitcher delay',
|
106 |
+
135: 'Balk',
|
107 |
+
139: 'Intentional walk',
|
108 |
+
140: 'Groundout (Double play)',
|
109 |
+
141: 'Unknown'
|
110 |
+
}
|
111 |
+
|
112 |
+
bresult = {
|
113 |
+
0: '空振り三振',
|
114 |
+
1: '単打', # 1b gb p
|
115 |
+
2: '単打', # 1b gb c
|
116 |
+
3: '単打', # 1b gb 1b
|
117 |
+
4: '単打', # 1b gb 2b
|
118 |
+
5: '単打', # 1b gb 3b? ld
|
119 |
+
6: '単打', # 1b gb ss
|
120 |
+
7: '単打', # 1b gb lf? ld
|
121 |
+
8: '単打', # 1b gb cf
|
122 |
+
9: '単打', # 1b gb rf
|
123 |
+
10: '2塁打', # 2b gb p
|
124 |
+
|
125 |
+
12: '2塁打', # 2b gb 1b
|
126 |
+
13: '2塁打', # 2b gb 2b
|
127 |
+
14: '2塁打', # 2b gb 3b? ld
|
128 |
+
15: '2塁打', # 2b gb ss
|
129 |
+
16: '2塁打', # 2b gb lf? ld
|
130 |
+
17: '2塁打', # 2b gb cf
|
131 |
+
18: '2塁打', # 2b gb rf
|
132 |
+
|
133 |
+
22: '3塁打', # 3b gb 2b
|
134 |
+
|
135 |
+
25: '3塁打', # 3b gb lf? ld
|
136 |
+
26: '3塁打', # 3b gb cf
|
137 |
+
27: '3塁打', # 3b gb rf
|
138 |
+
28: '本塁打(ランニング)', # ihr lf?
|
139 |
+
29: '本塁打(ランニング)', # ihr cf?
|
140 |
+
30: '本塁打(ランニング)', # ihr rf?
|
141 |
+
|
142 |
+
31: '空振り三振',
|
143 |
+
32: '振逃げ',
|
144 |
+
33: '振逃げ',
|
145 |
+
34: '四球',
|
146 |
+
35: 'フライ', # fb cf
|
147 |
+
36: '死球',
|
148 |
+
37: '打撃妨害',
|
149 |
+
38: '守備妨害',
|
150 |
+
|
151 |
+
39: '犠打野選',
|
152 |
+
40: 'ゴロ', #gb p
|
153 |
+
41: 'ゴロ', #gb c
|
154 |
+
42: 'ゴロ', #gb 1b
|
155 |
+
43: 'ゴロ', #gb 2b
|
156 |
+
44: 'ゴロ', #gb 3b
|
157 |
+
45: 'ゴロ', #gb ss
|
158 |
+
46: 'ゴロ', #gb lf
|
159 |
+
47: 'ゴロ', #gb cf
|
160 |
+
48: 'ゴロ', #gb rf
|
161 |
+
|
162 |
+
49: 'フライ', # fb p
|
163 |
+
50: 'フライ', # fb c
|
164 |
+
51: 'フライ', # fb 1b
|
165 |
+
52: 'フライ', # fb 2b
|
166 |
+
53: 'フライ', # fb 3b
|
167 |
+
54: 'フライ', # fb ss
|
168 |
+
55: 'フライ', # fb lf
|
169 |
+
56: 'フライ', # fb cf
|
170 |
+
57: 'フライ', # fb rf
|
171 |
+
58: 'ライナー', # ld p
|
172 |
+
60: 'ライナー', # ld 1b
|
173 |
+
61: 'ライナー', # ld 2b
|
174 |
+
62: 'ライナー', # ld 3b
|
175 |
+
63: 'ライナー', # ld ss
|
176 |
+
64: 'ライナー', # ld lf
|
177 |
+
|
178 |
+
66: 'ライナー', # ld rf
|
179 |
+
67: '犠打',
|
180 |
+
68: '犠打',
|
181 |
+
69: '犠打',
|
182 |
+
70: '犠打',
|
183 |
+
71: '犠打',
|
184 |
+
79: '犠飛', # sac fly lf
|
185 |
+
80: '犠飛', # sac fly cf
|
186 |
+
81:'犠飛', # sac fly rf
|
187 |
+
82: '邪飛',
|
188 |
+
83: '邪飛',
|
189 |
+
84: '邪飛',
|
190 |
+
85: '邪飛',
|
191 |
+
86: '邪飛',
|
192 |
+
87: '邪飛',
|
193 |
+
88: '邪飛',
|
194 |
+
|
195 |
+
90: '邪飛',
|
196 |
+
91: 'ゴロ(併殺打)',
|
197 |
+
92: 'ゴロ(併殺打)',
|
198 |
+
93: 'ゴロ(併殺打)',
|
199 |
+
94: 'ゴロ(併殺打)',
|
200 |
+
95: 'ゴロ(併殺打)',
|
201 |
+
96: 'ゴロ(併殺打)',
|
202 |
+
|
203 |
+
100: '失策出塁',
|
204 |
+
101: '失策出塁',
|
205 |
+
102: '失策出塁',
|
206 |
+
103: '失策出塁',
|
207 |
+
104: '失策出塁',
|
208 |
+
105: '失策出塁',
|
209 |
+
106: '失策出塁',
|
210 |
+
107: '失策出塁',
|
211 |
+
108: '失策出塁',
|
212 |
+
109: '野選',
|
213 |
+
110: '野選',
|
214 |
+
111: '野選',
|
215 |
+
112: '野選',
|
216 |
+
113: '野選',
|
217 |
+
114: '野選',
|
218 |
+
115: '犠打失策',
|
219 |
+
116: '犠飛失策',
|
220 |
+
|
221 |
+
208: '本塁打', # hr lf?
|
222 |
+
|
223 |
+
210: '本塁打', # hr cf?
|
224 |
+
212: '本塁打', # hr rf?
|
225 |
+
|
226 |
+
214: '見送り三振',
|
227 |
+
215: '犠打野選',
|
228 |
+
216: '犠打野選',
|
229 |
+
|
230 |
+
218: '犠打野選',
|
231 |
+
|
232 |
+
229: '振逃げ',
|
233 |
+
|
234 |
+
234: '邪直',
|
235 |
+
|
236 |
+
236: '邪直',
|
237 |
+
|
238 |
+
241: '3バント失敗',
|
239 |
+
|
240 |
+
258: '規則違反',
|
241 |
+
|
242 |
+
267: '打撃妨害',
|
243 |
+
|
244 |
+
338: '?'
|
245 |
+
}
|
246 |
+
|
247 |
+
game_kind = {
|
248 |
+
1: 'CL Regular Season',
|
249 |
+
2: 'PL Regular Season',
|
250 |
+
3: 'Nippon Series',
|
251 |
+
4: 'All-Star Game',
|
252 |
+
5: 'Spring Training',
|
253 |
+
11: 'Farm Championship',
|
254 |
+
26: 'Interleague',
|
255 |
+
35: 'CL Climax Series First Stage',
|
256 |
+
36: 'CL Climax Series Final Stage',
|
257 |
+
37: 'PL Climax Series First Stage',
|
258 |
+
38: 'PL Climax Series Final Stage'
|
259 |
+
}
|
data.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import polars as pl
|
2 |
+
from glob import glob
|
3 |
+
import os
|
4 |
+
from tqdm.auto import tqdm
|
5 |
+
|
6 |
+
from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, lr, game_kind
|
7 |
+
|
8 |
+
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
9 |
+
# SEASONS = list(range(2021, 2025+1))
|
10 |
+
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
11 |
+
|
12 |
+
data_df = pl.DataFrame()
|
13 |
+
text_df = pl.DataFrame()
|
14 |
+
aux_df = pl.DataFrame()
|
15 |
+
sched_df = pl.DataFrame()
|
16 |
+
aux_sched_df = pl.DataFrame()
|
17 |
+
|
18 |
+
for season in tqdm(SEASONS):
|
19 |
+
_data_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_data.parquet'))
|
20 |
+
data_df = pl.concat((data_df, _data_df))
|
21 |
+
|
22 |
+
_text_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_text.parquet'))
|
23 |
+
text_df = pl.concat((text_df, _text_df))
|
24 |
+
|
25 |
+
_aux_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_aux.parquet'))
|
26 |
+
aux_df = pl.concat((aux_df, _aux_df), how='diagonal_relaxed')
|
27 |
+
|
28 |
+
_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'schedule.parquet'))
|
29 |
+
sched_df = pl.concat((sched_df, _sched_df))
|
30 |
+
|
31 |
+
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
32 |
+
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
33 |
+
|
34 |
+
|
35 |
+
# sched_df = sched_df.
|
36 |
+
|
37 |
+
aux_df = (
|
38 |
+
aux_df
|
39 |
+
.filter(pl.col('type') != 'RUNNER')
|
40 |
+
.join(aux_sched_df[['gameGlobalId', 'gameDate']], on='gameGlobalId')
|
41 |
+
.with_columns(
|
42 |
+
pl.col('gameDate').str.to_date().dt.strftime('%Y%m%d'),
|
43 |
+
pl.col('home').struct.field('globalId').replace_strict(aux_global_id_to_code).alias('home'),
|
44 |
+
pl.col('visitor').struct.field('globalId').replace_strict(aux_global_id_to_code).alias('visitor'),
|
45 |
+
pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')).alias('tob_code'),
|
46 |
+
)
|
47 |
+
.filter(
|
48 |
+
# pl.col('pitch').struct.field('count') > 0
|
49 |
+
|
50 |
+
# either one alone should be enough but let's use them together to be safe
|
51 |
+
~((pl.col('code') == 98) & (pl.col('id') == 1))
|
52 |
+
)
|
53 |
+
.with_columns(
|
54 |
+
(pl.col('pitch').struct.field('count') == 1).cum_sum().over(['gameGlobalId', 'inning', 'tob']).alias('pa_count')
|
55 |
+
)
|
56 |
+
.with_columns(
|
57 |
+
pl.col('code').is_in([6402, 6404, 6406, 6405]).any().over(['gameGlobalId', 'inning', 'tob', 'pa_count']).alias('ibb')
|
58 |
+
)
|
59 |
+
.with_columns(
|
60 |
+
pl.when(~pl.col('ibb')).then(pl.col('pitch').struct.field('count') == 1).cum_sum().over(['gameGlobalId', 'inning', 'tob']).alias('new_pa_count')
|
61 |
+
)
|
62 |
+
.with_columns(
|
63 |
+
pl.len().over(['gameGlobalId', 'inning', 'tob', 'new_pa_count']).alias('pa_pitches'),
|
64 |
+
pl.max('new_pa_count').over(['gameGlobalId', 'inning', 'tob']).alias('inning_pas')
|
65 |
+
)
|
66 |
+
.with_columns(
|
67 |
+
(
|
68 |
+
pl.col('gameDate') + '_' + \
|
69 |
+
pl.col('visitor') + '_' + \
|
70 |
+
pl.col('home') + '_' + \
|
71 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')) + pl.col('new_pa_count').cast(pl.String).str.zfill(2) + '_' +\
|
72 |
+
pl.col('pitch').struct.field('count').cast(pl.String)
|
73 |
+
).alias('universal_code'),
|
74 |
+
(
|
75 |
+
pl.col('gameDate') + '_' + \
|
76 |
+
pl.col('visitor') + '_' + \
|
77 |
+
pl.col('home') + '_' + \
|
78 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2'))
|
79 |
+
).alias('inning_code'),
|
80 |
+
(
|
81 |
+
pl.col('gameDate') + '_' + \
|
82 |
+
pl.col('visitor') + '_' + \
|
83 |
+
pl.col('home') + '_' + \
|
84 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')) + pl.col('new_pa_count').cast(pl.String).str.zfill(2)
|
85 |
+
).alias('pa_code')
|
86 |
+
)
|
87 |
+
)
|
88 |
+
|
89 |
+
data_df = data_df
|
90 |
+
|
91 |
+
data_df = (
|
92 |
+
data_df
|
93 |
+
.with_columns(
|
94 |
+
*[
|
95 |
+
pl.col(col).cast(pl.Int32)
|
96 |
+
for col
|
97 |
+
in ['gameId', 'ballKind', 'ballSpeed', 'x', 'y', 'presult', 'bresult', 'battedX', 'battedY']
|
98 |
+
],
|
99 |
+
pl.col('UpdatedAt').str.to_datetime(),
|
100 |
+
pl.col('fiveDigitSerialNumber').str.slice(offset=0, length=3).alias('half_inning'),
|
101 |
+
pl.col('fiveDigitSerialNumber').str.slice(offset=3, length=2).alias('batter'),
|
102 |
+
)
|
103 |
+
.with_columns(
|
104 |
+
# pl.count('ID').over(['gameId', 'fiveDigitSerialNumber']).alias('pa_pitches')
|
105 |
+
(~pl.col('presult').is_in([0])).sum().over(['gameId', 'fiveDigitSerialNumber']).alias('pa_pitches'),
|
106 |
+
pl.col('presult').is_in([139]).any().over(['gameId', 'fiveDigitSerialNumber']).alias('ibb')
|
107 |
+
)
|
108 |
+
.filter(
|
109 |
+
(pl.col('pa_pitches') > 0)
|
110 |
+
)
|
111 |
+
.with_columns(
|
112 |
+
pl.when(~pl.col('ibb')).then(pl.col('batter'))
|
113 |
+
)
|
114 |
+
.with_columns(
|
115 |
+
pl.when(~pl.col('ibb')).then(pl.col('batter').rank('dense')).over(['gameId', 'half_inning']).cast(pl.String).str.zfill(2).alias('new_batter')
|
116 |
+
)
|
117 |
+
.with_columns(
|
118 |
+
(pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
|
119 |
+
)
|
120 |
+
.with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
|
121 |
+
.join(sched_df[['GameID', 'HomeTeamNameES', 'VisitorTeamNameES']].rename({'GameID': 'gameId'}), on='gameId')
|
122 |
+
.with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
|
123 |
+
.with_columns(
|
124 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
|
125 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)).alias('inning_code'),
|
126 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('pa_code')
|
127 |
+
)
|
128 |
+
.join(
|
129 |
+
(
|
130 |
+
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches']]
|
131 |
+
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
132 |
+
),
|
133 |
+
on='universal_code',
|
134 |
+
how='left'
|
135 |
+
)
|
136 |
+
.with_columns(
|
137 |
+
|
138 |
+
)
|
139 |
+
.join(
|
140 |
+
text_df[['GameID', 'GameKindID']].with_columns(
|
141 |
+
pl.col('GameID').cast(pl.Int32),
|
142 |
+
pl.col('GameKindID').cast(pl.Int32),
|
143 |
+
).unique(),
|
144 |
+
how='left',
|
145 |
+
left_on='gameId',
|
146 |
+
right_on='GameID'
|
147 |
+
)
|
148 |
+
.with_columns(pl.col('GameKindID').replace_strict(game_kind).alias('GameKindName'))
|
149 |
+
.with_columns(
|
150 |
+
pl.when((pl.col('inning_pas') == pl.col('aux_inning_pas')) & (pl.col('pa_pitches') == pl.col('aux_pa_pitches')))
|
151 |
+
.then('aux_bresult')
|
152 |
+
.alias('aux_bresult'),
|
153 |
+
|
154 |
+
pl.col('x').add(-100).mul(-1),
|
155 |
+
pl.col('y').neg().add(250),
|
156 |
+
pl.col('presult').replace_strict(presult),
|
157 |
+
pl.col('ballKind').replace_strict(ball_kind),
|
158 |
+
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
159 |
+
pl.col('batLR').replace_strict(lr),
|
160 |
+
|
161 |
+
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
162 |
+
.then(pl.lit('Regular Season'))
|
163 |
+
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
164 |
+
.then(pl.lit('Postseason'))
|
165 |
+
.otherwise('GameKindName')
|
166 |
+
.alias('coarse_game_kind')
|
167 |
+
)
|
168 |
+
)
|
pitcher_overview.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from data import SEASONS
|
4 |
+
|
5 |
+
def dummy(*inputs):
|
6 |
+
return inputs
|
7 |
+
|
8 |
+
def adjust_season_end_based_on_season_start(season_start, season_end):
|
9 |
+
return max(season_start, season_end)
|
10 |
+
|
11 |
+
def adjust_season_start_based_on_season_end(season_end, season_start):
|
12 |
+
return min(season_start, season_end)
|
13 |
+
|
14 |
+
|
15 |
+
def create_pitcher_overview(data_df):
|
16 |
+
with gr.Blocks() as app:
|
17 |
+
gr.Markdown('Test')
|
18 |
+
|
19 |
+
name = gr.Dropdown(sorted(data_df['pitId'].unique().to_list()), label='Name')
|
20 |
+
season_start = gr.Dropdown(SEASONS, label='Season start')
|
21 |
+
season_end = gr.Dropdown(SEASONS, label='Season end')
|
22 |
+
|
23 |
+
season_start.input(adjust_season_end_based_on_season_start, inputs=[season_start, season_end], outputs=season_end)
|
24 |
+
season_end.input(adjust_season_start_based_on_season_end, inputs=[season_end, season_start], outputs=season_start)
|
25 |
+
|
26 |
+
game_type = gr.Dropdown(['Spring Training', 'Regular Season', 'Postseason'], label='Game Type'])
|
27 |
+
|
28 |
+
generate = gr.Button('Generate')
|
29 |
+
|
30 |
+
dummy_io = [name, season_start, season_end, game_type
|
31 |
+
generate.click(dummy, inputs=dummy_io, outputs=dummy_io)
|
32 |
+
|
33 |
+
return app
|
34 |
+
|
35 |
+
if __name__ == '__main__':
|
36 |
+
create_pitcher_overview().launch()
|