Spaces:
Running
Running
Commit
·
a8b6a3f
1
Parent(s):
53e0878
Add files
Browse files- app.py +12 -0
- convert.py +259 -0
- data.py +168 -0
- pitcher_overview.py +36 -0
app.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
from data import data_df
|
| 4 |
+
from pitcher_overview import create_pitcher_overview
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
with gr.Blocks() as demo:
|
| 8 |
+
|
| 9 |
+
create_pitcher_overview(data_df)
|
| 10 |
+
|
| 11 |
+
if __name__ == '__main__':
|
| 12 |
+
demo.launch()
|
convert.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aux_global_id_to_code = {
|
| 2 |
+
7911: 'G',
|
| 3 |
+
7912: 'S',
|
| 4 |
+
7913: 'DB',
|
| 5 |
+
7914: 'D',
|
| 6 |
+
7915: 'T',
|
| 7 |
+
7916: 'C',
|
| 8 |
+
7917: 'F',
|
| 9 |
+
7918: 'E',
|
| 10 |
+
7919: 'L',
|
| 11 |
+
7920: 'M',
|
| 12 |
+
7921: 'B',
|
| 13 |
+
7922: 'H',
|
| 14 |
+
7925: 'PL',
|
| 15 |
+
7926: 'CL'
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
ball_kind = {
|
| 19 |
+
-1: '-',
|
| 20 |
+
31: 'Fastball (4-seam)',
|
| 21 |
+
32: 'Slider',
|
| 22 |
+
33: 'Vertical Slider',
|
| 23 |
+
34: 'Slurve',
|
| 24 |
+
35: 'Curve',
|
| 25 |
+
36: 'Slow Curve',
|
| 26 |
+
37: 'Power Curve',
|
| 27 |
+
38: 'Knuckle Curve',
|
| 28 |
+
39: 'Forkball',
|
| 29 |
+
40: 'Splitter',
|
| 30 |
+
41: 'Changeup',
|
| 31 |
+
42: 'Sinker',
|
| 32 |
+
43: 'Screwball',
|
| 33 |
+
44: 'Palmball',
|
| 34 |
+
45: 'Knuckleball',
|
| 35 |
+
46: 'Shootball',
|
| 36 |
+
47: 'Fastball (2-seam)',
|
| 37 |
+
48: 'Fastball (1-seam)',
|
| 38 |
+
49: 'Cutter',
|
| 39 |
+
50: 'Eephus', # technically "super" eephus but I haven't encountered a normal one yet
|
| 40 |
+
51: 'Hard Sinker',
|
| 41 |
+
52: 'Hard Slider',
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
ball_kind_code = {
|
| 45 |
+
-1: '-',
|
| 46 |
+
31: 'FF',
|
| 47 |
+
32: 'SL',
|
| 48 |
+
33: 'VS',
|
| 49 |
+
34: 'SV',
|
| 50 |
+
35: 'CU',
|
| 51 |
+
36: 'SC',
|
| 52 |
+
37: 'PC',
|
| 53 |
+
38: 'KC',
|
| 54 |
+
39: 'FO',
|
| 55 |
+
40: 'FS',
|
| 56 |
+
41: 'CH',
|
| 57 |
+
42: 'SI',
|
| 58 |
+
43: 'SB',
|
| 59 |
+
44: 'PB',
|
| 60 |
+
45: 'KN',
|
| 61 |
+
46: 'SH',
|
| 62 |
+
47: 'FT',
|
| 63 |
+
48: 'FW',
|
| 64 |
+
49: 'FC',
|
| 65 |
+
50: 'EP', # technically "super" eephus but I haven't encountered a normal one yet
|
| 66 |
+
51: 'HS',
|
| 67 |
+
52: 'HL'
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
lr = {1: 'l', 2: 'r'}
|
| 71 |
+
|
| 72 |
+
presult = {
|
| 73 |
+
0: 'None',
|
| 74 |
+
101: 'Foul',
|
| 75 |
+
102: 'Single',
|
| 76 |
+
103: 'Hit by pitch',
|
| 77 |
+
104: 'Double',
|
| 78 |
+
105: 'Triple',
|
| 79 |
+
106: 'Home run',
|
| 80 |
+
107: 'Error',
|
| 81 |
+
108: 'Groundout',
|
| 82 |
+
109: 'Flyout',
|
| 83 |
+
110: 'Lineout',
|
| 84 |
+
111: 'Groundout (Double play)',
|
| 85 |
+
112: 'Foul fly',
|
| 86 |
+
113: 'Foul line (?)',
|
| 87 |
+
114: 'Sacrifice bunt',
|
| 88 |
+
115: 'Sacrifice fly',
|
| 89 |
+
116: 'Swinging strike',
|
| 90 |
+
117: 'Looking strike',
|
| 91 |
+
118: 'Ball',
|
| 92 |
+
119: 'Walk',
|
| 93 |
+
120: 'Balk',
|
| 94 |
+
121: 'Batter interference',
|
| 95 |
+
122: 'Catcher interference',
|
| 96 |
+
123: 'Uncaught third strike',
|
| 97 |
+
124: 'Sacrifice hit error',
|
| 98 |
+
125: 'Sacrifice fly, error',
|
| 99 |
+
126: "Fielder's choice",
|
| 100 |
+
128: "Sacrifice fielder's choice",
|
| 101 |
+
129: 'Bunt strikeout',
|
| 102 |
+
130: 'Swinging strikeout',
|
| 103 |
+
131: 'Looking strikeout',
|
| 104 |
+
133: 'Inside-the-park home run',
|
| 105 |
+
134: 'Pitcher delay',
|
| 106 |
+
135: 'Balk',
|
| 107 |
+
139: 'Intentional walk',
|
| 108 |
+
140: 'Groundout (Double play)',
|
| 109 |
+
141: 'Unknown'
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
bresult = {
|
| 113 |
+
0: '空振り三振',
|
| 114 |
+
1: '単打', # 1b gb p
|
| 115 |
+
2: '単打', # 1b gb c
|
| 116 |
+
3: '単打', # 1b gb 1b
|
| 117 |
+
4: '単打', # 1b gb 2b
|
| 118 |
+
5: '単打', # 1b gb 3b? ld
|
| 119 |
+
6: '単打', # 1b gb ss
|
| 120 |
+
7: '単打', # 1b gb lf? ld
|
| 121 |
+
8: '単打', # 1b gb cf
|
| 122 |
+
9: '単打', # 1b gb rf
|
| 123 |
+
10: '2塁打', # 2b gb p
|
| 124 |
+
|
| 125 |
+
12: '2塁打', # 2b gb 1b
|
| 126 |
+
13: '2塁打', # 2b gb 2b
|
| 127 |
+
14: '2塁打', # 2b gb 3b? ld
|
| 128 |
+
15: '2塁打', # 2b gb ss
|
| 129 |
+
16: '2塁打', # 2b gb lf? ld
|
| 130 |
+
17: '2塁打', # 2b gb cf
|
| 131 |
+
18: '2塁打', # 2b gb rf
|
| 132 |
+
|
| 133 |
+
22: '3塁打', # 3b gb 2b
|
| 134 |
+
|
| 135 |
+
25: '3塁打', # 3b gb lf? ld
|
| 136 |
+
26: '3塁打', # 3b gb cf
|
| 137 |
+
27: '3塁打', # 3b gb rf
|
| 138 |
+
28: '本塁打(ランニング)', # ihr lf?
|
| 139 |
+
29: '本塁打(ランニング)', # ihr cf?
|
| 140 |
+
30: '本塁打(ランニング)', # ihr rf?
|
| 141 |
+
|
| 142 |
+
31: '空振り三振',
|
| 143 |
+
32: '振逃げ',
|
| 144 |
+
33: '振逃げ',
|
| 145 |
+
34: '四球',
|
| 146 |
+
35: 'フライ', # fb cf
|
| 147 |
+
36: '死球',
|
| 148 |
+
37: '打撃妨害',
|
| 149 |
+
38: '守備妨害',
|
| 150 |
+
|
| 151 |
+
39: '犠打野選',
|
| 152 |
+
40: 'ゴロ', #gb p
|
| 153 |
+
41: 'ゴロ', #gb c
|
| 154 |
+
42: 'ゴロ', #gb 1b
|
| 155 |
+
43: 'ゴロ', #gb 2b
|
| 156 |
+
44: 'ゴロ', #gb 3b
|
| 157 |
+
45: 'ゴロ', #gb ss
|
| 158 |
+
46: 'ゴロ', #gb lf
|
| 159 |
+
47: 'ゴロ', #gb cf
|
| 160 |
+
48: 'ゴロ', #gb rf
|
| 161 |
+
|
| 162 |
+
49: 'フライ', # fb p
|
| 163 |
+
50: 'フライ', # fb c
|
| 164 |
+
51: 'フライ', # fb 1b
|
| 165 |
+
52: 'フライ', # fb 2b
|
| 166 |
+
53: 'フライ', # fb 3b
|
| 167 |
+
54: 'フライ', # fb ss
|
| 168 |
+
55: 'フライ', # fb lf
|
| 169 |
+
56: 'フライ', # fb cf
|
| 170 |
+
57: 'フライ', # fb rf
|
| 171 |
+
58: 'ライナー', # ld p
|
| 172 |
+
60: 'ライナー', # ld 1b
|
| 173 |
+
61: 'ライナー', # ld 2b
|
| 174 |
+
62: 'ライナー', # ld 3b
|
| 175 |
+
63: 'ライナー', # ld ss
|
| 176 |
+
64: 'ライナー', # ld lf
|
| 177 |
+
|
| 178 |
+
66: 'ライナー', # ld rf
|
| 179 |
+
67: '犠打',
|
| 180 |
+
68: '犠打',
|
| 181 |
+
69: '犠打',
|
| 182 |
+
70: '犠打',
|
| 183 |
+
71: '犠打',
|
| 184 |
+
79: '犠飛', # sac fly lf
|
| 185 |
+
80: '犠飛', # sac fly cf
|
| 186 |
+
81:'犠飛', # sac fly rf
|
| 187 |
+
82: '邪飛',
|
| 188 |
+
83: '邪飛',
|
| 189 |
+
84: '邪飛',
|
| 190 |
+
85: '邪飛',
|
| 191 |
+
86: '邪飛',
|
| 192 |
+
87: '邪飛',
|
| 193 |
+
88: '邪飛',
|
| 194 |
+
|
| 195 |
+
90: '邪飛',
|
| 196 |
+
91: 'ゴロ(併殺打)',
|
| 197 |
+
92: 'ゴロ(併殺打)',
|
| 198 |
+
93: 'ゴロ(併殺打)',
|
| 199 |
+
94: 'ゴロ(併殺打)',
|
| 200 |
+
95: 'ゴロ(併殺打)',
|
| 201 |
+
96: 'ゴロ(併殺打)',
|
| 202 |
+
|
| 203 |
+
100: '失策出塁',
|
| 204 |
+
101: '失策出塁',
|
| 205 |
+
102: '失策出塁',
|
| 206 |
+
103: '失策出塁',
|
| 207 |
+
104: '失策出塁',
|
| 208 |
+
105: '失策出塁',
|
| 209 |
+
106: '失策出塁',
|
| 210 |
+
107: '失策出塁',
|
| 211 |
+
108: '失策出塁',
|
| 212 |
+
109: '野選',
|
| 213 |
+
110: '野選',
|
| 214 |
+
111: '野選',
|
| 215 |
+
112: '野選',
|
| 216 |
+
113: '野選',
|
| 217 |
+
114: '野選',
|
| 218 |
+
115: '犠打失策',
|
| 219 |
+
116: '犠飛失策',
|
| 220 |
+
|
| 221 |
+
208: '本塁打', # hr lf?
|
| 222 |
+
|
| 223 |
+
210: '本塁打', # hr cf?
|
| 224 |
+
212: '本塁打', # hr rf?
|
| 225 |
+
|
| 226 |
+
214: '見送り三振',
|
| 227 |
+
215: '犠打野選',
|
| 228 |
+
216: '犠打野選',
|
| 229 |
+
|
| 230 |
+
218: '犠打野選',
|
| 231 |
+
|
| 232 |
+
229: '振逃げ',
|
| 233 |
+
|
| 234 |
+
234: '邪直',
|
| 235 |
+
|
| 236 |
+
236: '邪直',
|
| 237 |
+
|
| 238 |
+
241: '3バント失敗',
|
| 239 |
+
|
| 240 |
+
258: '規則違反',
|
| 241 |
+
|
| 242 |
+
267: '打撃妨害',
|
| 243 |
+
|
| 244 |
+
338: '?'
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
game_kind = {
|
| 248 |
+
1: 'CL Regular Season',
|
| 249 |
+
2: 'PL Regular Season',
|
| 250 |
+
3: 'Nippon Series',
|
| 251 |
+
4: 'All-Star Game',
|
| 252 |
+
5: 'Spring Training',
|
| 253 |
+
11: 'Farm Championship',
|
| 254 |
+
26: 'Interleague',
|
| 255 |
+
35: 'CL Climax Series First Stage',
|
| 256 |
+
36: 'CL Climax Series Final Stage',
|
| 257 |
+
37: 'PL Climax Series First Stage',
|
| 258 |
+
38: 'PL Climax Series Final Stage'
|
| 259 |
+
}
|
data.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import polars as pl
|
| 2 |
+
from glob import glob
|
| 3 |
+
import os
|
| 4 |
+
from tqdm.auto import tqdm
|
| 5 |
+
|
| 6 |
+
from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, lr, game_kind
|
| 7 |
+
|
| 8 |
+
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
| 9 |
+
# SEASONS = list(range(2021, 2025+1))
|
| 10 |
+
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
| 11 |
+
|
| 12 |
+
data_df = pl.DataFrame()
|
| 13 |
+
text_df = pl.DataFrame()
|
| 14 |
+
aux_df = pl.DataFrame()
|
| 15 |
+
sched_df = pl.DataFrame()
|
| 16 |
+
aux_sched_df = pl.DataFrame()
|
| 17 |
+
|
| 18 |
+
for season in tqdm(SEASONS):
|
| 19 |
+
_data_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_data.parquet'))
|
| 20 |
+
data_df = pl.concat((data_df, _data_df))
|
| 21 |
+
|
| 22 |
+
_text_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_text.parquet'))
|
| 23 |
+
text_df = pl.concat((text_df, _text_df))
|
| 24 |
+
|
| 25 |
+
_aux_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'pbp_aux.parquet'))
|
| 26 |
+
aux_df = pl.concat((aux_df, _aux_df), how='diagonal_relaxed')
|
| 27 |
+
|
| 28 |
+
_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'schedule.parquet'))
|
| 29 |
+
sched_df = pl.concat((sched_df, _sched_df))
|
| 30 |
+
|
| 31 |
+
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
| 32 |
+
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# sched_df = sched_df.
|
| 36 |
+
|
| 37 |
+
aux_df = (
|
| 38 |
+
aux_df
|
| 39 |
+
.filter(pl.col('type') != 'RUNNER')
|
| 40 |
+
.join(aux_sched_df[['gameGlobalId', 'gameDate']], on='gameGlobalId')
|
| 41 |
+
.with_columns(
|
| 42 |
+
pl.col('gameDate').str.to_date().dt.strftime('%Y%m%d'),
|
| 43 |
+
pl.col('home').struct.field('globalId').replace_strict(aux_global_id_to_code).alias('home'),
|
| 44 |
+
pl.col('visitor').struct.field('globalId').replace_strict(aux_global_id_to_code).alias('visitor'),
|
| 45 |
+
pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')).alias('tob_code'),
|
| 46 |
+
)
|
| 47 |
+
.filter(
|
| 48 |
+
# pl.col('pitch').struct.field('count') > 0
|
| 49 |
+
|
| 50 |
+
# either one alone should be enough but let's use them together to be safe
|
| 51 |
+
~((pl.col('code') == 98) & (pl.col('id') == 1))
|
| 52 |
+
)
|
| 53 |
+
.with_columns(
|
| 54 |
+
(pl.col('pitch').struct.field('count') == 1).cum_sum().over(['gameGlobalId', 'inning', 'tob']).alias('pa_count')
|
| 55 |
+
)
|
| 56 |
+
.with_columns(
|
| 57 |
+
pl.col('code').is_in([6402, 6404, 6406, 6405]).any().over(['gameGlobalId', 'inning', 'tob', 'pa_count']).alias('ibb')
|
| 58 |
+
)
|
| 59 |
+
.with_columns(
|
| 60 |
+
pl.when(~pl.col('ibb')).then(pl.col('pitch').struct.field('count') == 1).cum_sum().over(['gameGlobalId', 'inning', 'tob']).alias('new_pa_count')
|
| 61 |
+
)
|
| 62 |
+
.with_columns(
|
| 63 |
+
pl.len().over(['gameGlobalId', 'inning', 'tob', 'new_pa_count']).alias('pa_pitches'),
|
| 64 |
+
pl.max('new_pa_count').over(['gameGlobalId', 'inning', 'tob']).alias('inning_pas')
|
| 65 |
+
)
|
| 66 |
+
.with_columns(
|
| 67 |
+
(
|
| 68 |
+
pl.col('gameDate') + '_' + \
|
| 69 |
+
pl.col('visitor') + '_' + \
|
| 70 |
+
pl.col('home') + '_' + \
|
| 71 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')) + pl.col('new_pa_count').cast(pl.String).str.zfill(2) + '_' +\
|
| 72 |
+
pl.col('pitch').struct.field('count').cast(pl.String)
|
| 73 |
+
).alias('universal_code'),
|
| 74 |
+
(
|
| 75 |
+
pl.col('gameDate') + '_' + \
|
| 76 |
+
pl.col('visitor') + '_' + \
|
| 77 |
+
pl.col('home') + '_' + \
|
| 78 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2'))
|
| 79 |
+
).alias('inning_code'),
|
| 80 |
+
(
|
| 81 |
+
pl.col('gameDate') + '_' + \
|
| 82 |
+
pl.col('visitor') + '_' + \
|
| 83 |
+
pl.col('home') + '_' + \
|
| 84 |
+
pl.col('inning').str.zfill(2) + pl.when(pl.col('tob') == 'Top').then(pl.lit('1')).otherwise(pl.lit('2')) + pl.col('new_pa_count').cast(pl.String).str.zfill(2)
|
| 85 |
+
).alias('pa_code')
|
| 86 |
+
)
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
data_df = data_df
|
| 90 |
+
|
| 91 |
+
data_df = (
|
| 92 |
+
data_df
|
| 93 |
+
.with_columns(
|
| 94 |
+
*[
|
| 95 |
+
pl.col(col).cast(pl.Int32)
|
| 96 |
+
for col
|
| 97 |
+
in ['gameId', 'ballKind', 'ballSpeed', 'x', 'y', 'presult', 'bresult', 'battedX', 'battedY']
|
| 98 |
+
],
|
| 99 |
+
pl.col('UpdatedAt').str.to_datetime(),
|
| 100 |
+
pl.col('fiveDigitSerialNumber').str.slice(offset=0, length=3).alias('half_inning'),
|
| 101 |
+
pl.col('fiveDigitSerialNumber').str.slice(offset=3, length=2).alias('batter'),
|
| 102 |
+
)
|
| 103 |
+
.with_columns(
|
| 104 |
+
# pl.count('ID').over(['gameId', 'fiveDigitSerialNumber']).alias('pa_pitches')
|
| 105 |
+
(~pl.col('presult').is_in([0])).sum().over(['gameId', 'fiveDigitSerialNumber']).alias('pa_pitches'),
|
| 106 |
+
pl.col('presult').is_in([139]).any().over(['gameId', 'fiveDigitSerialNumber']).alias('ibb')
|
| 107 |
+
)
|
| 108 |
+
.filter(
|
| 109 |
+
(pl.col('pa_pitches') > 0)
|
| 110 |
+
)
|
| 111 |
+
.with_columns(
|
| 112 |
+
pl.when(~pl.col('ibb')).then(pl.col('batter'))
|
| 113 |
+
)
|
| 114 |
+
.with_columns(
|
| 115 |
+
pl.when(~pl.col('ibb')).then(pl.col('batter').rank('dense')).over(['gameId', 'half_inning']).cast(pl.String).str.zfill(2).alias('new_batter')
|
| 116 |
+
)
|
| 117 |
+
.with_columns(
|
| 118 |
+
(pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
|
| 119 |
+
)
|
| 120 |
+
.with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
|
| 121 |
+
.join(sched_df[['GameID', 'HomeTeamNameES', 'VisitorTeamNameES']].rename({'GameID': 'gameId'}), on='gameId')
|
| 122 |
+
.with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
|
| 123 |
+
.with_columns(
|
| 124 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
|
| 125 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)).alias('inning_code'),
|
| 126 |
+
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('pa_code')
|
| 127 |
+
)
|
| 128 |
+
.join(
|
| 129 |
+
(
|
| 130 |
+
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches']]
|
| 131 |
+
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
| 132 |
+
),
|
| 133 |
+
on='universal_code',
|
| 134 |
+
how='left'
|
| 135 |
+
)
|
| 136 |
+
.with_columns(
|
| 137 |
+
|
| 138 |
+
)
|
| 139 |
+
.join(
|
| 140 |
+
text_df[['GameID', 'GameKindID']].with_columns(
|
| 141 |
+
pl.col('GameID').cast(pl.Int32),
|
| 142 |
+
pl.col('GameKindID').cast(pl.Int32),
|
| 143 |
+
).unique(),
|
| 144 |
+
how='left',
|
| 145 |
+
left_on='gameId',
|
| 146 |
+
right_on='GameID'
|
| 147 |
+
)
|
| 148 |
+
.with_columns(pl.col('GameKindID').replace_strict(game_kind).alias('GameKindName'))
|
| 149 |
+
.with_columns(
|
| 150 |
+
pl.when((pl.col('inning_pas') == pl.col('aux_inning_pas')) & (pl.col('pa_pitches') == pl.col('aux_pa_pitches')))
|
| 151 |
+
.then('aux_bresult')
|
| 152 |
+
.alias('aux_bresult'),
|
| 153 |
+
|
| 154 |
+
pl.col('x').add(-100).mul(-1),
|
| 155 |
+
pl.col('y').neg().add(250),
|
| 156 |
+
pl.col('presult').replace_strict(presult),
|
| 157 |
+
pl.col('ballKind').replace_strict(ball_kind),
|
| 158 |
+
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
| 159 |
+
pl.col('batLR').replace_strict(lr),
|
| 160 |
+
|
| 161 |
+
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
| 162 |
+
.then(pl.lit('Regular Season'))
|
| 163 |
+
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
| 164 |
+
.then(pl.lit('Postseason'))
|
| 165 |
+
.otherwise('GameKindName')
|
| 166 |
+
.alias('coarse_game_kind')
|
| 167 |
+
)
|
| 168 |
+
)
|
pitcher_overview.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
from data import SEASONS
|
| 4 |
+
|
| 5 |
+
def dummy(*inputs):
|
| 6 |
+
return inputs
|
| 7 |
+
|
| 8 |
+
def adjust_season_end_based_on_season_start(season_start, season_end):
|
| 9 |
+
return max(season_start, season_end)
|
| 10 |
+
|
| 11 |
+
def adjust_season_start_based_on_season_end(season_end, season_start):
|
| 12 |
+
return min(season_start, season_end)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def create_pitcher_overview(data_df):
|
| 16 |
+
with gr.Blocks() as app:
|
| 17 |
+
gr.Markdown('Test')
|
| 18 |
+
|
| 19 |
+
name = gr.Dropdown(sorted(data_df['pitId'].unique().to_list()), label='Name')
|
| 20 |
+
season_start = gr.Dropdown(SEASONS, label='Season start')
|
| 21 |
+
season_end = gr.Dropdown(SEASONS, label='Season end')
|
| 22 |
+
|
| 23 |
+
season_start.input(adjust_season_end_based_on_season_start, inputs=[season_start, season_end], outputs=season_end)
|
| 24 |
+
season_end.input(adjust_season_start_based_on_season_end, inputs=[season_end, season_start], outputs=season_start)
|
| 25 |
+
|
| 26 |
+
game_type = gr.Dropdown(['Spring Training', 'Regular Season', 'Postseason'], label='Game Type'])
|
| 27 |
+
|
| 28 |
+
generate = gr.Button('Generate')
|
| 29 |
+
|
| 30 |
+
dummy_io = [name, season_start, season_end, game_type
|
| 31 |
+
generate.click(dummy, inputs=dummy_io, outputs=dummy_io)
|
| 32 |
+
|
| 33 |
+
return app
|
| 34 |
+
|
| 35 |
+
if __name__ == '__main__':
|
| 36 |
+
create_pitcher_overview().launch()
|