AlvaroMros commited on
Commit
1999f4d
·
1 Parent(s): 2f2b5dd

Add grappling features to fighter stats and ML pipeline

Browse files

Introduces takedown accuracy and submission attempts per minute as new features in both the model and preprocessing pipeline. Updates requirements.txt with pinned versions and adds new dependencies for data processing and visualization. Enhances fighter history stats calculation to include grappling metrics, improving model input feature richness.

requirements.txt CHANGED
@@ -1,8 +1,12 @@
1
- requests
2
- beautifulsoup4
3
- pandas
4
- scikit-learn
5
- lazypredict
6
- tqdm
7
- xgboost
8
- lightgbm
 
 
 
 
 
1
+ joblib==1.4.2
2
+ pandas==2.2.2
3
+ requests==2.31.0
4
+ beautifulsoup4==4.12.3
5
+ lxml==5.2.1
6
+ scikit-learn==1.5.0
7
+ xgboost==2.0.3
8
+ lightgbm==4.3.0
9
+ gradio==4.31.5
10
+ gradio_client==0.16.4
11
+ matplotlib==3.9.0
12
+ seaborn==0.13.2
src/predict/models.py CHANGED
@@ -144,6 +144,8 @@ class BaseMLModel(BaseModel):
144
  'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
145
  'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
146
  'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
 
 
147
  }
148
 
149
  feature_vector = pd.DataFrame([features]).fillna(0)
 
144
  'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
145
  'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
146
  'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
147
+ 'takedown_accuracy_last_5_diff': f1_hist_stats['takedown_accuracy_last_n'] - f2_hist_stats['takedown_accuracy_last_n'],
148
+ 'sub_attempts_per_min_last_5_diff': f1_hist_stats['sub_attempts_per_min_last_n'] - f2_hist_stats['sub_attempts_per_min_last_n'],
149
  }
150
 
151
  feature_vector = pd.DataFrame([features]).fillna(0)
src/predict/preprocess.py CHANGED
@@ -38,6 +38,16 @@ def _parse_striking_stats(stat_str):
38
  except (ValueError, TypeError, AttributeError):
39
  return 0, 0
40
 
 
 
 
 
 
 
 
 
 
 
41
  def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history, fighters_df, n=5):
42
  """
43
  Calculates performance statistics for a fighter based on their last n fights.
@@ -52,16 +62,21 @@ def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history
52
  'avg_opp_elo_last_n': 1500, # Assume average ELO for first opponent
53
  'ko_percent_last_n': 0,
54
  'sig_str_landed_per_min_last_n': 0,
 
 
55
  }
56
 
57
  stats = {
58
  'wins': 0, 'ko_wins': 0, 'total_time_secs': 0,
59
- 'sig_str_landed': 0, 'opponent_elos': []
 
60
  }
61
 
62
  for fight in last_n_fights:
63
  is_fighter_1 = (fight['fighter_1'] == fighter_name)
64
  opponent_name = fight['fighter_2'] if is_fighter_1 else fight['fighter_1']
 
 
65
 
66
  if fight['winner'] == fighter_name:
67
  stats['wins'] += 1
@@ -74,18 +89,28 @@ def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history
74
 
75
  stats['total_time_secs'] += _parse_round_time_to_seconds(fight['round'], fight['time'])
76
 
77
- sig_str_stat = fight.get(f'f1_sig_str' if is_fighter_1 else 'f2_sig_str', '0 of 0')
78
  landed, _ = _parse_striking_stats(sig_str_stat)
79
  stats['sig_str_landed'] += landed
80
 
 
 
 
 
 
 
 
81
  # Final calculations
82
  avg_opp_elo = sum(stats['opponent_elos']) / len(stats['opponent_elos']) if stats['opponent_elos'] else 1500
 
83
 
84
  return {
85
  'wins_last_n': stats['wins'],
86
  'avg_opp_elo_last_n': avg_opp_elo,
87
  'ko_percent_last_n': (stats['ko_wins'] / stats['wins']) if stats['wins'] > 0 else 0,
88
- 'sig_str_landed_per_min_last_n': (stats['sig_str_landed'] * 60 / stats['total_time_secs']) if stats['total_time_secs'] > 0 else 0,
 
 
89
  }
90
 
91
  def preprocess_for_ml(fights_to_process, fighters_csv_path):
@@ -174,6 +199,9 @@ def preprocess_for_ml(fights_to_process, fighters_csv_path):
174
  'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
175
  'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
176
  'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
 
 
 
177
  }
178
  feature_list.append(features_win)
179
  target_list.append(1) # 1 represents a win
 
38
  except (ValueError, TypeError, AttributeError):
39
  return 0, 0
40
 
41
+ def _to_int_safe(val):
42
+ """Safely converts a value to an integer, returning 0 if it's invalid or empty."""
43
+ if pd.isna(val):
44
+ return 0
45
+ try:
46
+ # handle strings with whitespace or empty strings
47
+ return int(str(val).strip() or 0)
48
+ except (ValueError, TypeError):
49
+ return 0
50
+
51
  def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history, fighters_df, n=5):
52
  """
53
  Calculates performance statistics for a fighter based on their last n fights.
 
62
  'avg_opp_elo_last_n': 1500, # Assume average ELO for first opponent
63
  'ko_percent_last_n': 0,
64
  'sig_str_landed_per_min_last_n': 0,
65
+ 'takedown_accuracy_last_n': 0,
66
+ 'sub_attempts_per_min_last_n': 0,
67
  }
68
 
69
  stats = {
70
  'wins': 0, 'ko_wins': 0, 'total_time_secs': 0,
71
+ 'sig_str_landed': 0, 'opponent_elos': [],
72
+ 'td_landed': 0, 'td_attempted': 0, 'sub_attempts': 0
73
  }
74
 
75
  for fight in last_n_fights:
76
  is_fighter_1 = (fight['fighter_1'] == fighter_name)
77
  opponent_name = fight['fighter_2'] if is_fighter_1 else fight['fighter_1']
78
+
79
+ f_prefix = 'f1' if is_fighter_1 else 'f2'
80
 
81
  if fight['winner'] == fighter_name:
82
  stats['wins'] += 1
 
89
 
90
  stats['total_time_secs'] += _parse_round_time_to_seconds(fight['round'], fight['time'])
91
 
92
+ sig_str_stat = fight.get(f'{f_prefix}_sig_str', '0 of 0')
93
  landed, _ = _parse_striking_stats(sig_str_stat)
94
  stats['sig_str_landed'] += landed
95
 
96
+ td_stat = fight.get(f'{f_prefix}_td', '0 of 0')
97
+ td_landed, td_attempted = _parse_striking_stats(td_stat) # Can reuse this parser
98
+ stats['td_landed'] += td_landed
99
+ stats['td_attempted'] += td_attempted
100
+
101
+ stats['sub_attempts'] += _to_int_safe(fight.get(f'{f_prefix}_sub_att'))
102
+
103
  # Final calculations
104
  avg_opp_elo = sum(stats['opponent_elos']) / len(stats['opponent_elos']) if stats['opponent_elos'] else 1500
105
+ total_minutes = stats['total_time_secs'] / 60 if stats['total_time_secs'] > 0 else 0
106
 
107
  return {
108
  'wins_last_n': stats['wins'],
109
  'avg_opp_elo_last_n': avg_opp_elo,
110
  'ko_percent_last_n': (stats['ko_wins'] / stats['wins']) if stats['wins'] > 0 else 0,
111
+ 'sig_str_landed_per_min_last_n': (stats['sig_str_landed'] / total_minutes) if total_minutes > 0 else 0,
112
+ 'takedown_accuracy_last_n': (stats['td_landed'] / stats['td_attempted']) if stats['td_attempted'] > 0 else 0,
113
+ 'sub_attempts_per_min_last_n': (stats['sub_attempts'] / total_minutes) if total_minutes > 0 else 0,
114
  }
115
 
116
  def preprocess_for_ml(fights_to_process, fighters_csv_path):
 
199
  'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
200
  'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
201
  'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
202
+ # Grappling features
203
+ 'takedown_accuracy_last_5_diff': f1_hist_stats['takedown_accuracy_last_n'] - f2_hist_stats['takedown_accuracy_last_n'],
204
+ 'sub_attempts_per_min_last_5_diff': f1_hist_stats['sub_attempts_per_min_last_n'] - f2_hist_stats['sub_attempts_per_min_last_n'],
205
  }
206
  feature_list.append(features_win)
207
  target_list.append(1) # 1 represents a win