Spaces:
Running
Running
Commit
·
0ed953a
1
Parent(s):
a8b6a3f
Add whiff, csw, swing
Browse files- convert.py +1 -1
- data.py +15 -5
convert.py
CHANGED
@@ -95,7 +95,7 @@ presult = {
|
|
95 |
122: 'Catcher interference',
|
96 |
123: 'Uncaught third strike',
|
97 |
124: 'Sacrifice hit error',
|
98 |
-
125: 'Sacrifice fly
|
99 |
126: "Fielder's choice",
|
100 |
128: "Sacrifice fielder's choice",
|
101 |
129: 'Bunt strikeout',
|
|
|
95 |
122: 'Catcher interference',
|
96 |
123: 'Uncaught third strike',
|
97 |
124: 'Sacrifice hit error',
|
98 |
+
125: 'Sacrifice fly error',
|
99 |
126: "Fielder's choice",
|
100 |
128: "Sacrifice fielder's choice",
|
101 |
129: 'Bunt strikeout',
|
data.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import polars as pl
|
2 |
-
from glob import glob
|
3 |
import os
|
4 |
from tqdm.auto import tqdm
|
5 |
|
@@ -8,6 +7,7 @@ from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, l
|
|
8 |
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
9 |
# SEASONS = list(range(2021, 2025+1))
|
10 |
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
|
|
11 |
|
12 |
data_df = pl.DataFrame()
|
13 |
text_df = pl.DataFrame()
|
@@ -86,8 +86,6 @@ aux_df = (
|
|
86 |
)
|
87 |
)
|
88 |
|
89 |
-
data_df = data_df
|
90 |
-
|
91 |
data_df = (
|
92 |
data_df
|
93 |
.with_columns(
|
@@ -153,16 +151,28 @@ data_df = (
|
|
153 |
|
154 |
pl.col('x').add(-100).mul(-1),
|
155 |
pl.col('y').neg().add(250),
|
156 |
-
pl.col('presult').
|
157 |
pl.col('ballKind').replace_strict(ball_kind),
|
158 |
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
159 |
pl.col('batLR').replace_strict(lr),
|
|
|
160 |
|
161 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
162 |
.then(pl.lit('Regular Season'))
|
163 |
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
164 |
.then(pl.lit('Postseason'))
|
165 |
.otherwise('GameKindName')
|
166 |
-
.alias('coarse_game_kind')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
)
|
168 |
)
|
|
|
1 |
import polars as pl
|
|
|
2 |
import os
|
3 |
from tqdm.auto import tqdm
|
4 |
|
|
|
7 |
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
8 |
# SEASONS = list(range(2021, 2025+1))
|
9 |
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
10 |
+
# SEASONS = [2024]
|
11 |
|
12 |
data_df = pl.DataFrame()
|
13 |
text_df = pl.DataFrame()
|
|
|
86 |
)
|
87 |
)
|
88 |
|
|
|
|
|
89 |
data_df = (
|
90 |
data_df
|
91 |
.with_columns(
|
|
|
151 |
|
152 |
pl.col('x').add(-100).mul(-1),
|
153 |
pl.col('y').neg().add(250),
|
154 |
+
pl.col('presult').alias('presult_id'),
|
155 |
pl.col('ballKind').replace_strict(ball_kind),
|
156 |
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
157 |
pl.col('batLR').replace_strict(lr),
|
158 |
+
pl.col('date').str.to_date('%Y%m%d'),
|
159 |
|
160 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
161 |
.then(pl.lit('Regular Season'))
|
162 |
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
163 |
.then(pl.lit('Postseason'))
|
164 |
.otherwise('GameKindName')
|
165 |
+
.alias('coarse_game_kind'),
|
166 |
+
)
|
167 |
+
.with_columns(
|
168 |
+
pl.col('presult_id').replace_strict(presult).alias('presult')
|
169 |
+
)
|
170 |
+
.with_columns(
|
171 |
+
pl.col('presult').is_in(['None', 'Balk', 'Batter interference', 'Catcher interference', 'Pitcher delay', 'Intentional walk', 'Unknown']).not_().alias('pitch'),
|
172 |
+
pl.col('presult').is_in(['Swinging strike', 'Swinging strikeout']).alias('whiff'),
|
173 |
+
)
|
174 |
+
.with_columns(
|
175 |
+
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
176 |
+
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
177 |
)
|
178 |
)
|