Spaces:
Running
Running
Commit
·
3994c21
1
Parent(s):
2181ea6
Add model update pipeline and improve CLI options
Browse filesIntroduces a new 'update' pipeline option to retrain models only if new data is detected or models are missing, without running evaluation. Refactors model instantiation into a shared MODELS_TO_RUN list, updates CLI help and documentation, and implements the update_models_if_new_data method in PredictionPipeline.
- README.md +6 -2
- src/main.py +15 -2
- src/predict/main.py +14 -1
- src/predict/pipeline.py +21 -0
README.md
CHANGED
|
@@ -50,13 +50,17 @@ python -m src.main --pipeline predict --force-retrain
|
|
| 50 |
```
|
| 51 |
Always retrains all models from scratch with latest data. This is useful for when the way training models changes
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
```bash
|
| 56 |
python -m src.main --pipeline all --scrape-mode update
|
| 57 |
```
|
| 58 |
Runs scraping (update mode), analysis, and prediction in sequence.
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
## Model Performance
|
| 61 |
|
| 62 |
The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
|
|
|
|
| 50 |
```
|
| 51 |
Always retrains all models from scratch with latest data. This is useful for when the way training models changes
|
| 52 |
|
| 53 |
+
#### 2.1 Complete Pipeline
|
|
|
|
| 54 |
```bash
|
| 55 |
python -m src.main --pipeline all --scrape-mode update
|
| 56 |
```
|
| 57 |
Runs scraping (update mode), analysis, and prediction in sequence.
|
| 58 |
|
| 59 |
+
#### 2.2 Update Models
|
| 60 |
+
```bash
|
| 61 |
+
python -m src.main --pipeline update
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
## Model Performance
|
| 65 |
|
| 66 |
The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
|
src/main.py
CHANGED
|
@@ -15,8 +15,8 @@ def main():
|
|
| 15 |
'--pipeline',
|
| 16 |
type=str,
|
| 17 |
default='scrape',
|
| 18 |
-
choices=['scrape', 'analysis', 'predict', 'all'],
|
| 19 |
-
help="Pipeline to run: 'scrape', 'analysis', 'predict', or 'all'"
|
| 20 |
)
|
| 21 |
parser.add_argument(
|
| 22 |
'--scrape-mode',
|
|
@@ -69,6 +69,19 @@ def main():
|
|
| 69 |
print("\n=== Running ELO Analysis ===")
|
| 70 |
from analysis.elo import main as elo_main
|
| 71 |
elo_main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
if args.pipeline in ['predict', 'all']:
|
| 74 |
print("\n=== Running Prediction Pipeline ===")
|
|
|
|
| 15 |
'--pipeline',
|
| 16 |
type=str,
|
| 17 |
default='scrape',
|
| 18 |
+
choices=['scrape', 'analysis', 'predict', 'update', 'all'],
|
| 19 |
+
help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'"
|
| 20 |
)
|
| 21 |
parser.add_argument(
|
| 22 |
'--scrape-mode',
|
|
|
|
| 69 |
print("\n=== Running ELO Analysis ===")
|
| 70 |
from analysis.elo import main as elo_main
|
| 71 |
elo_main()
|
| 72 |
+
|
| 73 |
+
if args.pipeline == 'update':
|
| 74 |
+
print("\n=== Running Model Update Pipeline ===")
|
| 75 |
+
try:
|
| 76 |
+
from src.predict.main import MODELS_TO_RUN
|
| 77 |
+
from src.predict.pipeline import PredictionPipeline
|
| 78 |
+
except ImportError:
|
| 79 |
+
print("Fatal: Could not import prediction modules.")
|
| 80 |
+
print("Please ensure your project structure and python path are correct.")
|
| 81 |
+
return
|
| 82 |
+
|
| 83 |
+
pipeline = PredictionPipeline(models=MODELS_TO_RUN)
|
| 84 |
+
pipeline.update_models_if_new_data()
|
| 85 |
|
| 86 |
if args.pipeline in ['predict', 'all']:
|
| 87 |
print("\n=== Running Prediction Pipeline ===")
|
src/predict/main.py
CHANGED
|
@@ -12,6 +12,19 @@ from src.predict.models import (
|
|
| 12 |
LGBMModel
|
| 13 |
)
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def main():
|
| 16 |
"""
|
| 17 |
Main entry point to run the prediction pipeline.
|
|
@@ -70,7 +83,7 @@ def main():
|
|
| 70 |
# --- End of Model Definition ---
|
| 71 |
|
| 72 |
pipeline = PredictionPipeline(
|
| 73 |
-
models=
|
| 74 |
use_existing_models=use_existing_models,
|
| 75 |
force_retrain=force_retrain
|
| 76 |
)
|
|
|
|
| 12 |
LGBMModel
|
| 13 |
)
|
| 14 |
|
| 15 |
+
# --- Define Models to Run ---
|
| 16 |
+
# Instantiate all the models you want to evaluate here.
|
| 17 |
+
MODELS_TO_RUN = [
|
| 18 |
+
EloBaselineModel(),
|
| 19 |
+
LogisticRegressionModel(),
|
| 20 |
+
XGBoostModel(),
|
| 21 |
+
SVCModel(),
|
| 22 |
+
RandomForestModel(),
|
| 23 |
+
BernoulliNBModel(),
|
| 24 |
+
LGBMModel(),
|
| 25 |
+
]
|
| 26 |
+
# --- End of Model Definition ---
|
| 27 |
+
|
| 28 |
def main():
|
| 29 |
"""
|
| 30 |
Main entry point to run the prediction pipeline.
|
|
|
|
| 83 |
# --- End of Model Definition ---
|
| 84 |
|
| 85 |
pipeline = PredictionPipeline(
|
| 86 |
+
models=MODELS_TO_RUN,
|
| 87 |
use_existing_models=use_existing_models,
|
| 88 |
force_retrain=force_retrain
|
| 89 |
)
|
src/predict/pipeline.py
CHANGED
|
@@ -232,6 +232,27 @@ class PredictionPipeline:
|
|
| 232 |
if should_retrain:
|
| 233 |
self._train_and_save_models()
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
def _train_and_save_models(self):
|
| 236 |
"""Trains all models on the full dataset and saves them."""
|
| 237 |
print("\n\n--- Training and Saving All Models on Full Dataset ---")
|
|
|
|
| 232 |
if should_retrain:
|
| 233 |
self._train_and_save_models()
|
| 234 |
|
| 235 |
+
def update_models_if_new_data(self):
|
| 236 |
+
"""
|
| 237 |
+
Checks for new data and retrains/saves all models on the full dataset if needed.
|
| 238 |
+
This does not run any evaluation.
|
| 239 |
+
"""
|
| 240 |
+
print("\n--- Checking for Model Updates ---")
|
| 241 |
+
|
| 242 |
+
# Check if any model files are missing or invalid
|
| 243 |
+
missing_models = [m for m in self.models if not self._model_exists(m)]
|
| 244 |
+
has_new_data = self._has_new_data_since_last_training()
|
| 245 |
+
|
| 246 |
+
if missing_models:
|
| 247 |
+
missing_names = [m.__class__.__name__ for m in missing_models]
|
| 248 |
+
print(f"Missing or invalid model files found for: {missing_names}.")
|
| 249 |
+
self._train_and_save_models()
|
| 250 |
+
elif has_new_data:
|
| 251 |
+
print("New data detected, retraining all models...")
|
| 252 |
+
self._train_and_save_models()
|
| 253 |
+
else:
|
| 254 |
+
print("No new data detected. Models are already up-to-date.")
|
| 255 |
+
|
| 256 |
def _train_and_save_models(self):
|
| 257 |
"""Trains all models on the full dataset and saves them."""
|
| 258 |
print("\n\n--- Training and Saving All Models on Full Dataset ---")
|