Spaces:
Sleeping
Sleeping
import argparse | |
import sys | |
import os | |
def main(): | |
""" | |
Main entry point for the UFC data pipeline. | |
Supports scraping, analysis, and prediction workflows. | |
""" | |
parser = argparse.ArgumentParser(description="UFC Data Pipeline") | |
parser.add_argument( | |
'--pipeline', | |
type=str, | |
default='scrape', | |
choices=['scrape', 'analysis', 'predict', 'update', 'all'], | |
help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'" | |
) | |
parser.add_argument( | |
'--scrape-mode', | |
type=str, | |
default='full', | |
choices=['full', 'update'], | |
help="Scraping mode: 'full' (complete scraping) or 'update' (latest events only)" | |
) | |
parser.add_argument( | |
'--num-events', | |
type=int, | |
default=5, | |
help="Number of latest events to scrape in update mode (default: 5)" | |
) | |
# Model management arguments for prediction pipeline | |
parser.add_argument( | |
'--use-existing-models', | |
action='store_true', | |
default=True, | |
help="Use existing saved models if available and no new data (default: True)." | |
) | |
parser.add_argument( | |
'--no-use-existing-models', | |
action='store_true', | |
default=False, | |
help="Force retrain all models from scratch, ignoring existing saved models." | |
) | |
parser.add_argument( | |
'--force-retrain', | |
action='store_true', | |
default=False, | |
help="Force retrain all models even if no new data is available." | |
) | |
args = parser.parse_args() | |
if args.pipeline in ['scrape', 'all']: | |
print("=== Running Scraping Pipeline ===") | |
from src.scrape.main import main as scrape_main | |
# Override sys.argv to pass arguments to scrape.main | |
original_argv = sys.argv | |
sys.argv = ['scrape_main', '--mode', args.scrape_mode, '--num-events', str(args.num_events)] | |
try: | |
scrape_main() | |
finally: | |
sys.argv = original_argv | |
if args.pipeline in ['analysis', 'all']: | |
print("\n=== Running ELO Analysis ===") | |
from src.analysis.elo import main as elo_main | |
elo_main() | |
if args.pipeline == 'update': | |
print("\n=== Running Model Update Pipeline ===") | |
try: | |
from src.predict.main import MODELS_TO_RUN | |
from src.predict.pipeline import PredictionPipeline | |
except ImportError: | |
print("Fatal: Could not import prediction modules.") | |
print("Please ensure your project structure and python path are correct.") | |
return | |
pipeline = PredictionPipeline(models=MODELS_TO_RUN) | |
pipeline.update_models_if_new_data() | |
if args.pipeline in ['predict', 'all']: | |
print("\n=== Running Prediction Pipeline ===") | |
from src.predict.main import main as predict_main | |
# Override sys.argv to pass model management arguments to predict.main | |
original_argv = sys.argv | |
predict_args = ['predict_main'] | |
if args.no_use_existing_models: | |
predict_args.append('--no-use-existing-models') | |
elif args.use_existing_models: | |
predict_args.append('--use-existing-models') | |
if args.force_retrain: | |
predict_args.append('--force-retrain') | |
sys.argv = predict_args | |
try: | |
predict_main() | |
finally: | |
sys.argv = original_argv | |
if __name__ == '__main__': | |
main() | |