File size: 5,136 Bytes
e012a04
bf7e729
 
2aed0aa
 
 
 
 
9678fdb
 
7fcaffe
e012a04
 
7fcaffe
 
 
 
 
e012a04
 
7fcaffe
e012a04
 
 
bf7e729
7fcaffe
e012a04
 
7fcaffe
 
 
e012a04
7fcaffe
 
 
e012a04
7fcaffe
 
 
 
bf7e729
 
 
7fcaffe
 
 
 
 
 
 
e012a04
bf7e729
7fcaffe
bf7e729
 
 
 
 
 
7fcaffe
f3ecc65
7fcaffe
 
 
 
 
 
bf7e729
 
7fcaffe
bf7e729
2aed0aa
7fcaffe
 
2aed0aa
7fcaffe
2aed0aa
 
 
bf7e729
 
7fcaffe
 
bf7e729
7fcaffe
bf7e729
 
 
 
 
 
7fcaffe
 
 
bf7e729
7fcaffe
 
 
bf7e729
7fcaffe
 
 
 
 
 
 
 
 
 
 
2aed0aa
7fcaffe
2aed0aa
 
7fcaffe
2aed0aa
 
 
7fcaffe
2aed0aa
 
 
7fcaffe
2aed0aa
 
 
7fcaffe
 
 
 
 
2aed0aa
 
 
7fcaffe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from abc import ABC, abstractmethod
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from ..analysis.elo import process_fights_for_elo, INITIAL_ELO
from ..config import FIGHTERS_CSV_PATH
from .preprocess import preprocess_for_ml

class BaseModel(ABC):
    """Abstract base class for all prediction models."""
    
    def __init__(self):
        self.model_name = self.__class__.__name__
        
    @abstractmethod
    def train(self, train_fights):
        """Train the model using historical fight data."""
        pass

    @abstractmethod
    def predict(self, fight):
        """Predict the winner of a single fight."""
        pass

    def _format_prediction(self, winner, probability):
        """Format prediction results consistently."""
        return {'winner': winner, 'probability': probability}

class EloBaselineModel(BaseModel):
    """Simple ELO-based prediction model."""
    
    def train(self, train_fights):
        """Process historical fights to calculate current ELO ratings."""
        print(f"--- Training {self.model_name} ---")
        
        # Load and prepare fighter data
        self.fighters_df = pd.read_csv(FIGHTERS_CSV_PATH)
        self.fighters_df['full_name'] = self.fighters_df['first_name'] + ' ' + self.fighters_df['last_name']
        self.fighters_df = self.fighters_df.drop_duplicates(subset=['full_name']).set_index('full_name')
        
        # Calculate ELO ratings
        elo_ratings = process_fights_for_elo(train_fights)
        self.fighters_df['elo'] = pd.Series(elo_ratings)
        self.fighters_df['elo'] = self.fighters_df['elo'].fillna(INITIAL_ELO)
        
        print("ELO ratings calculated for all fighters.")

    def predict(self, fight):
        """Predict winner based on current ELO ratings."""
        f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
        
        try:
            f1_elo = self.fighters_df.loc[f1_name, 'elo']
            f2_elo = self.fighters_df.loc[f2_name, 'elo']
            
            # Calculate win probability using ELO formula
            prob_f1_wins = 1 / (1 + 10**((f2_elo - f1_elo) / 400))
            
            winner = f1_name if prob_f1_wins >= 0.5 else f2_name
            probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
            
            return self._format_prediction(winner, probability)
            
        except KeyError as e:
            print(f"Warning: Could not find ELO for fighter {e}. Skipping prediction.")
            return self._format_prediction(None, None)

class BaseMLModel(BaseModel):
    """Base class for all machine learning models."""
    
    def __init__(self, model):
        super().__init__()
        if model is None:
            raise ValueError("A model must be provided.")
        self.model = model

    def train(self, train_fights):
        """Train the ML model on preprocessed fight data."""
        print(f"--- Training {self.model_name} ---")
        
        # Preprocess data and fit model
        X_train, y_train, _ = preprocess_for_ml(train_fights, FIGHTERS_CSV_PATH)
        print(f"Fitting model on {X_train.shape[0]} samples...")
        self.model.fit(X_train, y_train)
        print("Model training complete.")

    def predict(self, fight):
        """Predict fight outcome using the trained ML model."""
        # Preprocess single fight for prediction
        X_pred, _, metadata = preprocess_for_ml([fight], FIGHTERS_CSV_PATH)
        
        if X_pred.empty:
            print(f"Warning: Could not process fight data for {fight['fighter_1']} vs {fight['fighter_2']}")
            return self._format_prediction(None, None)
        
        # Make prediction
        try:
            prob_f1_wins = self.model.predict_proba(X_pred)[0][1]
            winner = fight['fighter_1'] if prob_f1_wins >= 0.5 else fight['fighter_2']
            probability = prob_f1_wins if prob_f1_wins >= 0.5 else 1 - prob_f1_wins
            
            return self._format_prediction(winner, probability)
            
        except Exception as e:
            print(f"Error making prediction: {e}")
            return self._format_prediction(None, None)

# Concrete ML model implementations
class LogisticRegressionModel(BaseMLModel):
    def __init__(self):
        super().__init__(LogisticRegression(random_state=42))

class SVCModel(BaseMLModel):
    def __init__(self):
        super().__init__(SVC(probability=True, random_state=42))

class RandomForestModel(BaseMLModel):
    def __init__(self):
        super().__init__(RandomForestClassifier(n_estimators=100, random_state=42))

class BernoulliNBModel(BaseMLModel):
    def __init__(self):
        super().__init__(BernoulliNB())

class XGBoostModel(BaseMLModel):
    def __init__(self):
        super().__init__(XGBClassifier(random_state=42))

class LGBMModel(BaseMLModel):
    def __init__(self):
        super().__init__(LGBMClassifier(random_state=42))