patrickramos commited on
Commit
0ed953a
·
1 Parent(s): a8b6a3f

Add whiff, csw, swing

Browse files
Files changed (2) hide show
  1. convert.py +1 -1
  2. data.py +15 -5
convert.py CHANGED
@@ -95,7 +95,7 @@ presult = {
95
  122: 'Catcher interference',
96
  123: 'Uncaught third strike',
97
  124: 'Sacrifice hit error',
98
- 125: 'Sacrifice fly, error',
99
  126: "Fielder's choice",
100
  128: "Sacrifice fielder's choice",
101
  129: 'Bunt strikeout',
 
95
  122: 'Catcher interference',
96
  123: 'Uncaught third strike',
97
  124: 'Sacrifice hit error',
98
+ 125: 'Sacrifice fly error',
99
  126: "Fielder's choice",
100
  128: "Sacrifice fielder's choice",
101
  129: 'Bunt strikeout',
data.py CHANGED
@@ -1,5 +1,4 @@
1
  import polars as pl
2
- from glob import glob
3
  import os
4
  from tqdm.auto import tqdm
5
 
@@ -8,6 +7,7 @@ from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, l
8
  DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
9
  # SEASONS = list(range(2021, 2025+1))
10
  SEASONS = [2021, 2022, 2023, 2024, 2025]
 
11
 
12
  data_df = pl.DataFrame()
13
  text_df = pl.DataFrame()
@@ -86,8 +86,6 @@ aux_df = (
86
  )
87
  )
88
 
89
- data_df = data_df
90
-
91
  data_df = (
92
  data_df
93
  .with_columns(
@@ -153,16 +151,28 @@ data_df = (
153
 
154
  pl.col('x').add(-100).mul(-1),
155
  pl.col('y').neg().add(250),
156
- pl.col('presult').replace_strict(presult),
157
  pl.col('ballKind').replace_strict(ball_kind),
158
  pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
159
  pl.col('batLR').replace_strict(lr),
 
160
 
161
  pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
162
  .then(pl.lit('Regular Season'))
163
  .when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
164
  .then(pl.lit('Postseason'))
165
  .otherwise('GameKindName')
166
- .alias('coarse_game_kind')
 
 
 
 
 
 
 
 
 
 
 
167
  )
168
  )
 
1
  import polars as pl
 
2
  import os
3
  from tqdm.auto import tqdm
4
 
 
7
  DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
8
  # SEASONS = list(range(2021, 2025+1))
9
  SEASONS = [2021, 2022, 2023, 2024, 2025]
10
+ # SEASONS = [2024]
11
 
12
  data_df = pl.DataFrame()
13
  text_df = pl.DataFrame()
 
86
  )
87
  )
88
 
 
 
89
  data_df = (
90
  data_df
91
  .with_columns(
 
151
 
152
  pl.col('x').add(-100).mul(-1),
153
  pl.col('y').neg().add(250),
154
+ pl.col('presult').alias('presult_id'),
155
  pl.col('ballKind').replace_strict(ball_kind),
156
  pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
157
  pl.col('batLR').replace_strict(lr),
158
+ pl.col('date').str.to_date('%Y%m%d'),
159
 
160
  pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
161
  .then(pl.lit('Regular Season'))
162
  .when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
163
  .then(pl.lit('Postseason'))
164
  .otherwise('GameKindName')
165
+ .alias('coarse_game_kind'),
166
+ )
167
+ .with_columns(
168
+ pl.col('presult_id').replace_strict(presult).alias('presult')
169
+ )
170
+ .with_columns(
171
+ pl.col('presult').is_in(['None', 'Balk', 'Batter interference', 'Catcher interference', 'Pitcher delay', 'Intentional walk', 'Unknown']).not_().alias('pitch'),
172
+ pl.col('presult').is_in(['Swinging strike', 'Swinging strikeout']).alias('whiff'),
173
+ )
174
+ .with_columns(
175
+ (pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
176
+ (pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
177
  )
178
  )