Spaces:
Running
Running
Commit
·
1999f4d
1
Parent(s):
2f2b5dd
Add grappling features to fighter stats and ML pipeline
Browse filesIntroduces takedown accuracy and submission attempts per minute as new features in both the model and preprocessing pipeline. Updates requirements.txt with pinned versions and adds new dependencies for data processing and visualization. Enhances fighter history stats calculation to include grappling metrics, improving model input feature richness.
- requirements.txt +12 -8
- src/predict/models.py +2 -0
- src/predict/preprocess.py +31 -3
requirements.txt
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
xgboost
|
| 8 |
-
lightgbm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
joblib==1.4.2
|
| 2 |
+
pandas==2.2.2
|
| 3 |
+
requests==2.31.0
|
| 4 |
+
beautifulsoup4==4.12.3
|
| 5 |
+
lxml==5.2.1
|
| 6 |
+
scikit-learn==1.5.0
|
| 7 |
+
xgboost==2.0.3
|
| 8 |
+
lightgbm==4.3.0
|
| 9 |
+
gradio==4.31.5
|
| 10 |
+
gradio_client==0.16.4
|
| 11 |
+
matplotlib==3.9.0
|
| 12 |
+
seaborn==0.13.2
|
src/predict/models.py
CHANGED
|
@@ -144,6 +144,8 @@ class BaseMLModel(BaseModel):
|
|
| 144 |
'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
|
| 145 |
'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
|
| 146 |
'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
|
|
|
|
|
|
|
| 147 |
}
|
| 148 |
|
| 149 |
feature_vector = pd.DataFrame([features]).fillna(0)
|
|
|
|
| 144 |
'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
|
| 145 |
'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
|
| 146 |
'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
|
| 147 |
+
'takedown_accuracy_last_5_diff': f1_hist_stats['takedown_accuracy_last_n'] - f2_hist_stats['takedown_accuracy_last_n'],
|
| 148 |
+
'sub_attempts_per_min_last_5_diff': f1_hist_stats['sub_attempts_per_min_last_n'] - f2_hist_stats['sub_attempts_per_min_last_n'],
|
| 149 |
}
|
| 150 |
|
| 151 |
feature_vector = pd.DataFrame([features]).fillna(0)
|
src/predict/preprocess.py
CHANGED
|
@@ -38,6 +38,16 @@ def _parse_striking_stats(stat_str):
|
|
| 38 |
except (ValueError, TypeError, AttributeError):
|
| 39 |
return 0, 0
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history, fighters_df, n=5):
|
| 42 |
"""
|
| 43 |
Calculates performance statistics for a fighter based on their last n fights.
|
|
@@ -52,16 +62,21 @@ def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history
|
|
| 52 |
'avg_opp_elo_last_n': 1500, # Assume average ELO for first opponent
|
| 53 |
'ko_percent_last_n': 0,
|
| 54 |
'sig_str_landed_per_min_last_n': 0,
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
|
| 57 |
stats = {
|
| 58 |
'wins': 0, 'ko_wins': 0, 'total_time_secs': 0,
|
| 59 |
-
'sig_str_landed': 0, 'opponent_elos': []
|
|
|
|
| 60 |
}
|
| 61 |
|
| 62 |
for fight in last_n_fights:
|
| 63 |
is_fighter_1 = (fight['fighter_1'] == fighter_name)
|
| 64 |
opponent_name = fight['fighter_2'] if is_fighter_1 else fight['fighter_1']
|
|
|
|
|
|
|
| 65 |
|
| 66 |
if fight['winner'] == fighter_name:
|
| 67 |
stats['wins'] += 1
|
|
@@ -74,18 +89,28 @@ def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history
|
|
| 74 |
|
| 75 |
stats['total_time_secs'] += _parse_round_time_to_seconds(fight['round'], fight['time'])
|
| 76 |
|
| 77 |
-
sig_str_stat = fight.get(f'
|
| 78 |
landed, _ = _parse_striking_stats(sig_str_stat)
|
| 79 |
stats['sig_str_landed'] += landed
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# Final calculations
|
| 82 |
avg_opp_elo = sum(stats['opponent_elos']) / len(stats['opponent_elos']) if stats['opponent_elos'] else 1500
|
|
|
|
| 83 |
|
| 84 |
return {
|
| 85 |
'wins_last_n': stats['wins'],
|
| 86 |
'avg_opp_elo_last_n': avg_opp_elo,
|
| 87 |
'ko_percent_last_n': (stats['ko_wins'] / stats['wins']) if stats['wins'] > 0 else 0,
|
| 88 |
-
'sig_str_landed_per_min_last_n': (stats['sig_str_landed']
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
|
| 91 |
def preprocess_for_ml(fights_to_process, fighters_csv_path):
|
|
@@ -174,6 +199,9 @@ def preprocess_for_ml(fights_to_process, fighters_csv_path):
|
|
| 174 |
'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
|
| 175 |
'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
|
| 176 |
'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
|
|
|
|
|
|
|
|
|
|
| 177 |
}
|
| 178 |
feature_list.append(features_win)
|
| 179 |
target_list.append(1) # 1 represents a win
|
|
|
|
| 38 |
except (ValueError, TypeError, AttributeError):
|
| 39 |
return 0, 0
|
| 40 |
|
| 41 |
+
def _to_int_safe(val):
|
| 42 |
+
"""Safely converts a value to an integer, returning 0 if it's invalid or empty."""
|
| 43 |
+
if pd.isna(val):
|
| 44 |
+
return 0
|
| 45 |
+
try:
|
| 46 |
+
# handle strings with whitespace or empty strings
|
| 47 |
+
return int(str(val).strip() or 0)
|
| 48 |
+
except (ValueError, TypeError):
|
| 49 |
+
return 0
|
| 50 |
+
|
| 51 |
def _get_fighter_history_stats(fighter_name, current_fight_date, fighter_history, fighters_df, n=5):
|
| 52 |
"""
|
| 53 |
Calculates performance statistics for a fighter based on their last n fights.
|
|
|
|
| 62 |
'avg_opp_elo_last_n': 1500, # Assume average ELO for first opponent
|
| 63 |
'ko_percent_last_n': 0,
|
| 64 |
'sig_str_landed_per_min_last_n': 0,
|
| 65 |
+
'takedown_accuracy_last_n': 0,
|
| 66 |
+
'sub_attempts_per_min_last_n': 0,
|
| 67 |
}
|
| 68 |
|
| 69 |
stats = {
|
| 70 |
'wins': 0, 'ko_wins': 0, 'total_time_secs': 0,
|
| 71 |
+
'sig_str_landed': 0, 'opponent_elos': [],
|
| 72 |
+
'td_landed': 0, 'td_attempted': 0, 'sub_attempts': 0
|
| 73 |
}
|
| 74 |
|
| 75 |
for fight in last_n_fights:
|
| 76 |
is_fighter_1 = (fight['fighter_1'] == fighter_name)
|
| 77 |
opponent_name = fight['fighter_2'] if is_fighter_1 else fight['fighter_1']
|
| 78 |
+
|
| 79 |
+
f_prefix = 'f1' if is_fighter_1 else 'f2'
|
| 80 |
|
| 81 |
if fight['winner'] == fighter_name:
|
| 82 |
stats['wins'] += 1
|
|
|
|
| 89 |
|
| 90 |
stats['total_time_secs'] += _parse_round_time_to_seconds(fight['round'], fight['time'])
|
| 91 |
|
| 92 |
+
sig_str_stat = fight.get(f'{f_prefix}_sig_str', '0 of 0')
|
| 93 |
landed, _ = _parse_striking_stats(sig_str_stat)
|
| 94 |
stats['sig_str_landed'] += landed
|
| 95 |
|
| 96 |
+
td_stat = fight.get(f'{f_prefix}_td', '0 of 0')
|
| 97 |
+
td_landed, td_attempted = _parse_striking_stats(td_stat) # Can reuse this parser
|
| 98 |
+
stats['td_landed'] += td_landed
|
| 99 |
+
stats['td_attempted'] += td_attempted
|
| 100 |
+
|
| 101 |
+
stats['sub_attempts'] += _to_int_safe(fight.get(f'{f_prefix}_sub_att'))
|
| 102 |
+
|
| 103 |
# Final calculations
|
| 104 |
avg_opp_elo = sum(stats['opponent_elos']) / len(stats['opponent_elos']) if stats['opponent_elos'] else 1500
|
| 105 |
+
total_minutes = stats['total_time_secs'] / 60 if stats['total_time_secs'] > 0 else 0
|
| 106 |
|
| 107 |
return {
|
| 108 |
'wins_last_n': stats['wins'],
|
| 109 |
'avg_opp_elo_last_n': avg_opp_elo,
|
| 110 |
'ko_percent_last_n': (stats['ko_wins'] / stats['wins']) if stats['wins'] > 0 else 0,
|
| 111 |
+
'sig_str_landed_per_min_last_n': (stats['sig_str_landed'] / total_minutes) if total_minutes > 0 else 0,
|
| 112 |
+
'takedown_accuracy_last_n': (stats['td_landed'] / stats['td_attempted']) if stats['td_attempted'] > 0 else 0,
|
| 113 |
+
'sub_attempts_per_min_last_n': (stats['sub_attempts'] / total_minutes) if total_minutes > 0 else 0,
|
| 114 |
}
|
| 115 |
|
| 116 |
def preprocess_for_ml(fights_to_process, fighters_csv_path):
|
|
|
|
| 199 |
'avg_opp_elo_last_5_diff': f1_hist_stats['avg_opp_elo_last_n'] - f2_hist_stats['avg_opp_elo_last_n'],
|
| 200 |
'ko_percent_last_5_diff': f1_hist_stats['ko_percent_last_n'] - f2_hist_stats['ko_percent_last_n'],
|
| 201 |
'sig_str_landed_per_min_last_5_diff': f1_hist_stats['sig_str_landed_per_min_last_n'] - f2_hist_stats['sig_str_landed_per_min_last_n'],
|
| 202 |
+
# Grappling features
|
| 203 |
+
'takedown_accuracy_last_5_diff': f1_hist_stats['takedown_accuracy_last_n'] - f2_hist_stats['takedown_accuracy_last_n'],
|
| 204 |
+
'sub_attempts_per_min_last_5_diff': f1_hist_stats['sub_attempts_per_min_last_n'] - f2_hist_stats['sub_attempts_per_min_last_n'],
|
| 205 |
}
|
| 206 |
feature_list.append(features_win)
|
| 207 |
target_list.append(1) # 1 represents a win
|