AlvaroMros's picture
(CAREFUL!!!!!!) Refactor argument parsing and prediction pipeline
7fcaffe
raw
history blame
5.14 kB
from abc import ABC, abstractmethod
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from ..analysis.elo import process_fights_for_elo, INITIAL_ELO
from ..config import FIGHTERS_CSV_PATH
from .preprocess import preprocess_for_ml
class BaseModel(ABC):
"""Abstract base class for all prediction models."""
def __init__(self):
self.model_name = self.__class__.__name__
@abstractmethod
def train(self, train_fights):
"""Train the model using historical fight data."""
pass
@abstractmethod
def predict(self, fight):
"""Predict the winner of a single fight."""
pass
def _format_prediction(self, winner, probability):
"""Format prediction results consistently."""
return {'winner': winner, 'probability': probability}
class EloBaselineModel(BaseModel):
"""Simple ELO-based prediction model."""
def train(self, train_fights):
"""Process historical fights to calculate current ELO ratings."""
print(f"--- Training {self.model_name} ---")
# Load and prepare fighter data
self.fighters_df = pd.read_csv(FIGHTERS_CSV_PATH)
self.fighters_df['full_name'] = self.fighters_df['first_name'] + ' ' + self.fighters_df['last_name']
self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
# Calculate ELO ratings
elo_ratings = process_fights_for_elo(train_fights)
self.fighters_df['elo'] = pd.Series(elo_ratings)
self.fighters_df['elo'] = self.fighters_df['elo'].fillna(INITIAL_ELO)
print("ELO ratings calculated for all fighters.")
def predict(self, fight):
"""Predict winner based on current ELO ratings."""
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
try:
f1_elo = self.fighters_df.loc[f1_name, 'elo']
f2_elo = self.fighters_df.loc[f2_name, 'elo']
# Calculate win probability using ELO formula
prob_f1_wins = 1 / (1 + 10**((f2_elo - f1_elo) / 400))
winner = f1_name if prob_f1_wins >= 0.5 else f2_name
probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
return self._format_prediction(winner, probability)
except KeyError as e:
print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
return self._format_prediction(None, None)
class BaseMLModel(BaseModel):
"""Base class for all machine learning models."""
def __init__(self, model):
super().__init__()
if model is None:
raise ValueError("A model must be provided.")
self.model = model
def train(self, train_fights):
"""Train the ML model on preprocessed fight data."""
print(f"--- Training {self.model_name} ---")
# Preprocess data and fit model
X_train, y_train, _ = preprocess_for_ml(train_fights, FIGHTERS_CSV_PATH)
print(f"Fitting model on {X_train.shape[0]} samples...")
self.model.fit(X_train, y_train)
print("Model training complete.")
def predict(self, fight):
"""Predict fight outcome using the trained ML model."""
# Preprocess single fight for prediction
X_pred, _, metadata = preprocess_for_ml([fight], FIGHTERS_CSV_PATH)
if X_pred.empty:
print(f"Warning: Could not process fight data for {fight['fighter_1']} vs {fight['fighter_2']}")
return self._format_prediction(None, None)
# Make prediction
try:
prob_f1_wins = self.model.predict_proba(X_pred)[0][1]
winner = fight['fighter_1'] if prob_f1_wins >= 0.5 else fight['fighter_2']
probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
return self._format_prediction(winner, probability)
except Exception as e:
print(f"Error making prediction: {e}")
return self._format_prediction(None, None)
# Concrete ML model implementations
class LogisticRegressionModel(BaseMLModel):
def __init__(self):
super().__init__(LogisticRegression(random_state=42))
class SVCModel(BaseMLModel):
def __init__(self):
super().__init__(SVC(probability=True, random_state=42))
class RandomForestModel(BaseMLModel):
def __init__(self):
super().__init__(RandomForestClassifier(n_estimators=100, random_state=42))
class BernoulliNBModel(BaseMLModel):
def __init__(self):
super().__init__(BernoulliNB())
class XGBoostModel(BaseMLModel):
def __init__(self):
super().__init__(XGBClassifier(random_state=42))
class LGBMModel(BaseMLModel):
def __init__(self):
super().__init__(LGBMClassifier(random_state=42))