Spaces:
Sleeping
Sleeping
File size: 5,136 Bytes
e012a04 bf7e729 2aed0aa 9678fdb 7fcaffe e012a04 7fcaffe e012a04 7fcaffe e012a04 bf7e729 7fcaffe e012a04 7fcaffe e012a04 7fcaffe e012a04 7fcaffe bf7e729 7fcaffe e012a04 bf7e729 7fcaffe bf7e729 7fcaffe f3ecc65 7fcaffe bf7e729 7fcaffe bf7e729 2aed0aa 7fcaffe 2aed0aa 7fcaffe 2aed0aa bf7e729 7fcaffe bf7e729 7fcaffe bf7e729 7fcaffe bf7e729 7fcaffe bf7e729 7fcaffe 2aed0aa 7fcaffe 2aed0aa 7fcaffe 2aed0aa 7fcaffe 2aed0aa 7fcaffe 2aed0aa 7fcaffe 2aed0aa 7fcaffe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
from abc import ABC, abstractmethod
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from ..analysis.elo import process_fights_for_elo, INITIAL_ELO
from ..config import FIGHTERS_CSV_PATH
from .preprocess import preprocess_for_ml
class BaseModel(ABC):
"""Abstract base class for all prediction models."""
def __init__(self):
self.model_name = self.__class__.__name__
@abstractmethod
def train(self, train_fights):
"""Train the model using historical fight data."""
pass
@abstractmethod
def predict(self, fight):
"""Predict the winner of a single fight."""
pass
def _format_prediction(self, winner, probability):
"""Format prediction results consistently."""
return {'winner': winner, 'probability': probability}
class EloBaselineModel(BaseModel):
"""Simple ELO-based prediction model."""
def train(self, train_fights):
"""Process historical fights to calculate current ELO ratings."""
print(f"--- Training {self.model_name} ---")
# Load and prepare fighter data
self.fighters_df = pd.read_csv(FIGHTERS_CSV_PATH)
self.fighters_df['full_name'] = self.fighters_df['first_name'] + ' ' + self.fighters_df['last_name']
self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
# Calculate ELO ratings
elo_ratings = process_fights_for_elo(train_fights)
self.fighters_df['elo'] = pd.Series(elo_ratings)
self.fighters_df['elo'] = self.fighters_df['elo'].fillna(INITIAL_ELO)
print("ELO ratings calculated for all fighters.")
def predict(self, fight):
"""Predict winner based on current ELO ratings."""
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
try:
f1_elo = self.fighters_df.loc[f1_name, 'elo']
f2_elo = self.fighters_df.loc[f2_name, 'elo']
# Calculate win probability using ELO formula
prob_f1_wins = 1 / (1 + 10**((f2_elo - f1_elo) / 400))
winner = f1_name if prob_f1_wins >= 0.5 else f2_name
probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
return self._format_prediction(winner, probability)
except KeyError as e:
print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
return self._format_prediction(None, None)
class BaseMLModel(BaseModel):
"""Base class for all machine learning models."""
def __init__(self, model):
super().__init__()
if model is None:
raise ValueError("A model must be provided.")
self.model = model
def train(self, train_fights):
"""Train the ML model on preprocessed fight data."""
print(f"--- Training {self.model_name} ---")
# Preprocess data and fit model
X_train, y_train, _ = preprocess_for_ml(train_fights, FIGHTERS_CSV_PATH)
print(f"Fitting model on {X_train.shape[0]} samples...")
self.model.fit(X_train, y_train)
print("Model training complete.")
def predict(self, fight):
"""Predict fight outcome using the trained ML model."""
# Preprocess single fight for prediction
X_pred, _, metadata = preprocess_for_ml([fight], FIGHTERS_CSV_PATH)
if X_pred.empty:
print(f"Warning: Could not process fight data for {fight['fighter_1']} vs {fight['fighter_2']}")
return self._format_prediction(None, None)
# Make prediction
try:
prob_f1_wins = self.model.predict_proba(X_pred)[0][1]
winner = fight['fighter_1'] if prob_f1_wins >= 0.5 else fight['fighter_2']
probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
return self._format_prediction(winner, probability)
except Exception as e:
print(f"Error making prediction: {e}")
return self._format_prediction(None, None)
# Concrete ML model implementations
class LogisticRegressionModel(BaseMLModel):
def __init__(self):
super().__init__(LogisticRegression(random_state=42))
class SVCModel(BaseMLModel):
def __init__(self):
super().__init__(SVC(probability=True, random_state=42))
class RandomForestModel(BaseMLModel):
def __init__(self):
super().__init__(RandomForestClassifier(n_estimators=100, random_state=42))
class BernoulliNBModel(BaseMLModel):
def __init__(self):
super().__init__(BernoulliNB())
class XGBoostModel(BaseMLModel):
def __init__(self):
super().__init__(XGBClassifier(random_state=42))
class LGBMModel(BaseMLModel):
def __init__(self):
super().__init__(LGBMClassifier(random_state=42)) |