Spaces:

AlvaroMros
/

ufc-predictor

Sleeping

AlvaroMros commited on Jul 5

Commit

f3ecc65

1 Parent(s): 596c26a

Add win probability to fight predictions

Updated prediction logic in models and pipeline to return both the predicted winner and the associated win probability. Adjusted app.py and predict_new.py to display the probability alongside the winner. This provides more informative prediction results for users and improves model evaluation reporting.

Files changed (5) hide show

.gitignore +2 -1
app.py +5 -3
src/predict/models.py +21 -9
src/predict/pipeline.py +5 -2
src/predict/predict_new.py +5 -3

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 *__pycache__/
-example_event.html

 *__pycache__/
+example_event.html
+web/

app.py CHANGED Viewed

@@ -54,10 +54,12 @@ def predict_fight(model_name, fighter1_name, fighter2_name):
             'event_date': datetime.now().strftime('%B %d, %Y')
         }
-        predicted_winner = model.predict(fight)
-        if predicted_winner:
-            return f"Predicted Winner: {predicted_winner}"
         else:
             return "Could not make a prediction. Is one of the fighters new or not in the dataset?"

             'event_date': datetime.now().strftime('%B %d, %Y')
         }
+        prediction_result = model.predict(fight)
+        if prediction_result and prediction_result.get('winner'):
+            winner = prediction_result['winner']
+            prob = prediction_result['probability']
+            return f"Predicted Winner: {winner} ({prob:.1%})"
         else:
             return "Could not make a prediction. Is one of the fighters new or not in the dataset?"

src/predict/models.py CHANGED Viewed

@@ -54,19 +54,24 @@ class EloBaselineModel(BaseModel):
         self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
     def predict(self, fight):
-        """Predicts the winner based on who has the higher ELO score."""
         f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
         try:
             f1_elo = self.fighters_df.loc[f1_name, 'elo']
             f2_elo = self.fighters_df.loc[f2_name, 'elo']
-            return f1_name if f1_elo > f2_elo else f2_name
         except KeyError as e:
-            # If a fighter isn't found, we can't make a prediction.
-            # Returning None or a default is a design choice.
             print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
-            return None
 class BaseMLModel(BaseModel):
     """
@@ -112,14 +117,14 @@ class BaseMLModel(BaseModel):
     def predict(self, fight):
         """
-        Predicts the outcome of a single fight by generating its feature vector.
         """
         f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
         fight_date = pd.to_datetime(fight['event_date'])
         if f1_name not in self.fighters_df.index or f2_name not in self.fighters_df.index:
             print(f"Warning: Fighter not found. Skipping prediction for {f1_name} vs {f2_name}")
-            return None
         f1_stats = self.fighters_df.loc[f1_name]
         f2_stats = self.fighters_df.loc[f2_name]
@@ -149,8 +154,15 @@ class BaseMLModel(BaseModel):
         }
         feature_vector = pd.DataFrame([features]).fillna(0)
-        prediction = self.model.predict(feature_vector)[0]
-        return f1_name if prediction == 1 else f2_name
 class LogisticRegressionModel(BaseMLModel):
     """A thin wrapper for scikit-learn's LogisticRegression."""

         self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
     def predict(self, fight):
+        """Predicts the winner based on ELO and calculates win probability."""
         f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
         try:
             f1_elo = self.fighters_df.loc[f1_name, 'elo']
             f2_elo = self.fighters_df.loc[f2_name, 'elo']
+            # Calculate win probability for fighter 1 using the ELO formula
+            prob_f1_wins = 1 / (1 + 10**((f2_elo - f1_elo) / 400))
+            if prob_f1_wins >= 0.5:
+                return {'winner': f1_name, 'probability': prob_f1_wins}
+            else:
+                return {'winner': f2_name, 'probability': 1 - prob_f1_wins}
         except KeyError as e:
             print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
+            return {'winner': None, 'probability': None}
 class BaseMLModel(BaseModel):
     """
     def predict(self, fight):
         """
+        Predicts the outcome of a single fight, returning the winner and probability.
         """
         f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
         fight_date = pd.to_datetime(fight['event_date'])
         if f1_name not in self.fighters_df.index or f2_name not in self.fighters_df.index:
             print(f"Warning: Fighter not found. Skipping prediction for {f1_name} vs {f2_name}")
+            return {'winner': None, 'probability': None}
         f1_stats = self.fighters_df.loc[f1_name]
         f2_stats = self.fighters_df.loc[f2_name]
         }
         feature_vector = pd.DataFrame([features]).fillna(0)
+        # Use predict_proba to get probabilities for each class
+        probabilities = self.model.predict_proba(feature_vector)[0]
+        prob_f1_wins = probabilities[1]  # Probability of class '1' (fighter 1 wins)
+        if prob_f1_wins >= 0.5:
+            return {'winner': f1_name, 'probability': prob_f1_wins}
+        else:
+            return {'winner': f2_name, 'probability': 1 - prob_f1_wins}
 class LogisticRegressionModel(BaseMLModel):
     """A thin wrapper for scikit-learn's LogisticRegression."""

src/predict/pipeline.py CHANGED Viewed

@@ -66,8 +66,10 @@ class PredictionPipeline:
                 actual_winner = fight['winner']
                 event_name = fight.get('event_name', 'Unknown Event')
-                predicted_winner = model.predict(fight)
                 is_correct = (predicted_winner == actual_winner)
                 if is_correct:
                     correct_predictions += 1
@@ -76,6 +78,7 @@ class PredictionPipeline:
                     'fight': f"{f1_name} vs. {f2_name}",
                     'event': event_name,
                     'predicted_winner': predicted_winner,
                     'actual_winner': actual_winner,
                     'is_correct': is_correct
                 })

                 actual_winner = fight['winner']
                 event_name = fight.get('event_name', 'Unknown Event')
+                prediction_result = model.predict(fight)
+                predicted_winner = prediction_result.get('winner')
+                probability = prediction_result.get('probability')
                 is_correct = (predicted_winner == actual_winner)
                 if is_correct:
                     correct_predictions += 1
                     'fight': f"{f1_name} vs. {f2_name}",
                     'event': event_name,
                     'predicted_winner': predicted_winner,
+                    'probability': f"{probability:.1%}" if probability is not None else "N/A",
                     'actual_winner': actual_winner,
                     'is_correct': is_correct
                 })

src/predict/predict_new.py CHANGED Viewed

@@ -31,10 +31,12 @@ def predict_new_fight(fighter1_name, fighter2_name, model_path):
     # 3. Make the prediction
     print(f"\nPredicting winner for: {fighter1_name} vs. {fighter2_name}")
-    predicted_winner = model.predict(fight)
-    if predicted_winner:
-        print(f"\n---> Predicted Winner: {predicted_winner} <---")
     else:
         print("\nCould not make a prediction. One of the fighters may not be in the dataset.")

     # 3. Make the prediction
     print(f"\nPredicting winner for: {fighter1_name} vs. {fighter2_name}")
+    prediction_result = model.predict(fight)
+    if prediction_result and prediction_result.get('winner'):
+        winner = prediction_result['winner']
+        prob = prediction_result['probability']
+        print(f"\n---> Predicted Winner: {winner} ({prob:.1%}) <---")
     else:
         print("\nCould not make a prediction. One of the fighters may not be in the dataset.")