AlvaroMros commited on
Commit
f3ecc65
·
1 Parent(s): 596c26a

Add win probability to fight predictions

Browse files

Updated prediction logic in models and pipeline to return both the predicted winner and the associated win probability. Adjusted app.py and predict_new.py to display the probability alongside the winner. This provides more informative prediction results for users and improves model evaluation reporting.

.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *__pycache__/
2
- example_event.html
 
 
1
  *__pycache__/
2
+ example_event.html
3
+ web/
app.py CHANGED
@@ -54,10 +54,12 @@ def predict_fight(model_name, fighter1_name, fighter2_name):
54
  'event_date': datetime.now().strftime('%B %d, %Y')
55
  }
56
 
57
- predicted_winner = model.predict(fight)
58
 
59
- if predicted_winner:
60
- return f"Predicted Winner: {predicted_winner}"
 
 
61
  else:
62
  return "Could not make a prediction. Is one of the fighters new or not in the dataset?"
63
 
 
54
  'event_date': datetime.now().strftime('%B %d, %Y')
55
  }
56
 
57
+ prediction_result = model.predict(fight)
58
 
59
+ if prediction_result and prediction_result.get('winner'):
60
+ winner = prediction_result['winner']
61
+ prob = prediction_result['probability']
62
+ return f"Predicted Winner: {winner} ({prob:.1%})"
63
  else:
64
  return "Could not make a prediction. Is one of the fighters new or not in the dataset?"
65
 
src/predict/models.py CHANGED
@@ -54,19 +54,24 @@ class EloBaselineModel(BaseModel):
54
  self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
55
 
56
  def predict(self, fight):
57
- """Predicts the winner based on who has the higher ELO score."""
58
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
59
 
60
  try:
61
  f1_elo = self.fighters_df.loc[f1_name, 'elo']
62
  f2_elo = self.fighters_df.loc[f2_name, 'elo']
63
 
64
- return f1_name if f1_elo > f2_elo else f2_name
 
 
 
 
 
 
 
65
  except KeyError as e:
66
- # If a fighter isn't found, we can't make a prediction.
67
- # Returning None or a default is a design choice.
68
  print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
69
- return None
70
 
71
  class BaseMLModel(BaseModel):
72
  """
@@ -112,14 +117,14 @@ class BaseMLModel(BaseModel):
112
 
113
  def predict(self, fight):
114
  """
115
- Predicts the outcome of a single fight by generating its feature vector.
116
  """
117
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
118
  fight_date = pd.to_datetime(fight['event_date'])
119
 
120
  if f1_name not in self.fighters_df.index or f2_name not in self.fighters_df.index:
121
  print(f"Warning: Fighter not found. Skipping prediction for {f1_name} vs {f2_name}")
122
- return None
123
 
124
  f1_stats = self.fighters_df.loc[f1_name]
125
  f2_stats = self.fighters_df.loc[f2_name]
@@ -149,8 +154,15 @@ class BaseMLModel(BaseModel):
149
  }
150
 
151
  feature_vector = pd.DataFrame([features]).fillna(0)
152
- prediction = self.model.predict(feature_vector)[0]
153
- return f1_name if prediction == 1 else f2_name
 
 
 
 
 
 
 
154
 
155
  class LogisticRegressionModel(BaseMLModel):
156
  """A thin wrapper for scikit-learn's LogisticRegression."""
 
54
  self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
55
 
56
  def predict(self, fight):
57
+ """Predicts the winner based on ELO and calculates win probability."""
58
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
59
 
60
  try:
61
  f1_elo = self.fighters_df.loc[f1_name, 'elo']
62
  f2_elo = self.fighters_df.loc[f2_name, 'elo']
63
 
64
+ # Calculate win probability for fighter 1 using the ELO formula
65
+ prob_f1_wins = 1 / (1 + 10**((f2_elo - f1_elo) / 400))
66
+
67
+ if prob_f1_wins >= 0.5:
68
+ return {'winner': f1_name, 'probability': prob_f1_wins}
69
+ else:
70
+ return {'winner': f2_name, 'probability': 1 - prob_f1_wins}
71
+
72
  except KeyError as e:
 
 
73
  print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
74
+ return {'winner': None, 'probability': None}
75
 
76
  class BaseMLModel(BaseModel):
77
  """
 
117
 
118
  def predict(self, fight):
119
  """
120
+ Predicts the outcome of a single fight, returning the winner and probability.
121
  """
122
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
123
  fight_date = pd.to_datetime(fight['event_date'])
124
 
125
  if f1_name not in self.fighters_df.index or f2_name not in self.fighters_df.index:
126
  print(f"Warning: Fighter not found. Skipping prediction for {f1_name} vs {f2_name}")
127
+ return {'winner': None, 'probability': None}
128
 
129
  f1_stats = self.fighters_df.loc[f1_name]
130
  f2_stats = self.fighters_df.loc[f2_name]
 
154
  }
155
 
156
  feature_vector = pd.DataFrame([features]).fillna(0)
157
+
158
+ # Use predict_proba to get probabilities for each class
159
+ probabilities = self.model.predict_proba(feature_vector)[0]
160
+ prob_f1_wins = probabilities[1] # Probability of class '1' (fighter 1 wins)
161
+
162
+ if prob_f1_wins >= 0.5:
163
+ return {'winner': f1_name, 'probability': prob_f1_wins}
164
+ else:
165
+ return {'winner': f2_name, 'probability': 1 - prob_f1_wins}
166
 
167
  class LogisticRegressionModel(BaseMLModel):
168
  """A thin wrapper for scikit-learn's LogisticRegression."""
src/predict/pipeline.py CHANGED
@@ -66,8 +66,10 @@ class PredictionPipeline:
66
  actual_winner = fight['winner']
67
  event_name = fight.get('event_name', 'Unknown Event')
68
 
69
- predicted_winner = model.predict(fight)
70
-
 
 
71
  is_correct = (predicted_winner == actual_winner)
72
  if is_correct:
73
  correct_predictions += 1
@@ -76,6 +78,7 @@ class PredictionPipeline:
76
  'fight': f"{f1_name} vs. {f2_name}",
77
  'event': event_name,
78
  'predicted_winner': predicted_winner,
 
79
  'actual_winner': actual_winner,
80
  'is_correct': is_correct
81
  })
 
66
  actual_winner = fight['winner']
67
  event_name = fight.get('event_name', 'Unknown Event')
68
 
69
+ prediction_result = model.predict(fight)
70
+ predicted_winner = prediction_result.get('winner')
71
+ probability = prediction_result.get('probability')
72
+
73
  is_correct = (predicted_winner == actual_winner)
74
  if is_correct:
75
  correct_predictions += 1
 
78
  'fight': f"{f1_name} vs. {f2_name}",
79
  'event': event_name,
80
  'predicted_winner': predicted_winner,
81
+ 'probability': f"{probability:.1%}" if probability is not None else "N/A",
82
  'actual_winner': actual_winner,
83
  'is_correct': is_correct
84
  })
src/predict/predict_new.py CHANGED
@@ -31,10 +31,12 @@ def predict_new_fight(fighter1_name, fighter2_name, model_path):
31
 
32
  # 3. Make the prediction
33
  print(f"\nPredicting winner for: {fighter1_name} vs. {fighter2_name}")
34
- predicted_winner = model.predict(fight)
35
 
36
- if predicted_winner:
37
- print(f"\n---> Predicted Winner: {predicted_winner} <---")
 
 
38
  else:
39
  print("\nCould not make a prediction. One of the fighters may not be in the dataset.")
40
 
 
31
 
32
  # 3. Make the prediction
33
  print(f"\nPredicting winner for: {fighter1_name} vs. {fighter2_name}")
34
+ prediction_result = model.predict(fight)
35
 
36
+ if prediction_result and prediction_result.get('winner'):
37
+ winner = prediction_result['winner']
38
+ prob = prediction_result['probability']
39
+ print(f"\n---> Predicted Winner: {winner} ({prob:.1%}) <---")
40
  else:
41
  print("\nCould not make a prediction. One of the fighters may not be in the dataset.")
42