File size: 3,530 Bytes
ffd453e
5271c2e
 
7fcaffe
 
ffd453e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fcaffe
 
ffd453e
 
 
 
 
 
 
 
 
 
5271c2e
 
ffd453e
 
 
 
3994c21
ffd453e
 
 
 
 
 
 
 
 
 
 
5271c2e
 
ffd453e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5271c2e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import argparse
import sys
import os

def main():
    """
    Main entry point for the UFC data pipeline.
    Supports scraping, analysis, and prediction workflows.
    """
    parser = argparse.ArgumentParser(description="UFC Data Pipeline")
    parser.add_argument(
        '--pipeline', 
        type=str, 
        default='scrape', 
        choices=['scrape', 'analysis', 'predict', 'update', 'all'],
        help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'"
    )
    parser.add_argument(
        '--scrape-mode', 
        type=str, 
        default='full', 
        choices=['full', 'update'],
        help="Scraping mode: 'full' (complete scraping) or 'update' (latest events only)"
    )
    parser.add_argument(
        '--num-events', 
        type=int, 
        default=5,
        help="Number of latest events to scrape in update mode (default: 5)"
    )
    # Model management arguments for prediction pipeline
    parser.add_argument(
        '--use-existing-models',
        action='store_true',
        default=True,
        help="Use existing saved models if available and no new data (default: True)."
    )
    parser.add_argument(
        '--no-use-existing-models',
        action='store_true',
        default=False,
        help="Force retrain all models from scratch, ignoring existing saved models."
    )
    parser.add_argument(
        '--force-retrain',
        action='store_true',
        default=False,
        help="Force retrain all models even if no new data is available."
    )
    
    args = parser.parse_args()
    
    if args.pipeline in ['scrape', 'all']:
        print("=== Running Scraping Pipeline ===")
        from src.scrape.main import main as scrape_main
        
        # Override sys.argv to pass arguments to scrape.main
        original_argv = sys.argv
        sys.argv = ['scrape_main', '--mode', args.scrape_mode, '--num-events', str(args.num_events)]
        try:
            scrape_main()
        finally:
            sys.argv = original_argv
    
    if args.pipeline in ['analysis', 'all']:
        print("\n=== Running ELO Analysis ===")
        from src.analysis.elo import main as elo_main
        elo_main()

    if args.pipeline == 'update':
        print("\n=== Running Model Update Pipeline ===")
        try:
            from src.predict.main import MODELS_TO_RUN
            from src.predict.pipeline import PredictionPipeline
        except ImportError:
            print("Fatal: Could not import prediction modules.")
            print("Please ensure your project structure and python path are correct.")
            return

        pipeline = PredictionPipeline(models=MODELS_TO_RUN)
        pipeline.update_models_if_new_data()
    
    if args.pipeline in ['predict', 'all']:
        print("\n=== Running Prediction Pipeline ===")
        from src.predict.main import main as predict_main
        
        # Override sys.argv to pass model management arguments to predict.main
        original_argv = sys.argv
        predict_args = ['predict_main']
        
        if args.no_use_existing_models:
            predict_args.append('--no-use-existing-models')
        elif args.use_existing_models:
            predict_args.append('--use-existing-models')
            
        if args.force_retrain:
            predict_args.append('--force-retrain')
            
        sys.argv = predict_args
        try:
            predict_main()
        finally:
            sys.argv = original_argv

if __name__ == '__main__':
    main()