Alvaro commited on
Commit
d48eef6
·
1 Parent(s): c17726e
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *__pycache__/
 
2
  example_event.html
 
1
  *__pycache__/
2
+ output/
3
  example_event.html
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'src' directory a package.
src/analysis/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'analysis' directory a package.
src/analysis/elo.py CHANGED
@@ -1,10 +1,13 @@
1
  import csv
2
  import os
3
  from datetime import datetime
 
 
 
4
 
5
  # --- ELO Configuration ---
6
  INITIAL_ELO = 1500
7
- K_FACTOR = 32
8
  # --- End Configuration ---
9
 
10
  def calculate_expected_score(rating1, rating2):
@@ -27,7 +30,7 @@ def update_elo_draw(elo1, elo2):
27
 
28
  return elo1 + change1, elo2 + change2
29
 
30
- def process_fights_for_elo(fights_csv_path='output/ufc_fights.csv'):
31
  """
32
  Processes all fights chronologically to calculate final ELO scores for all fighters.
33
  """
@@ -70,7 +73,7 @@ def process_fights_for_elo(fights_csv_path='output/ufc_fights.csv'):
70
 
71
  return elos
72
 
73
- def add_elo_to_fighters_csv(elos, fighters_csv_path='output/ufc_fighters.csv'):
74
  """
75
  Adds the final ELO scores as a new column to the fighters CSV data.
76
  """
 
1
  import csv
2
  import os
3
  from datetime import datetime
4
+ import sys
5
+
6
+ from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
7
 
8
  # --- ELO Configuration ---
9
  INITIAL_ELO = 1500
10
+ K_FACTOR = 40
11
  # --- End Configuration ---
12
 
13
  def calculate_expected_score(rating1, rating2):
 
30
 
31
  return elo1 + change1, elo2 + change2
32
 
33
+ def process_fights_for_elo(fights_csv_path=FIGHTS_CSV_PATH):
34
  """
35
  Processes all fights chronologically to calculate final ELO scores for all fighters.
36
  """
 
73
 
74
  return elos
75
 
76
+ def add_elo_to_fighters_csv(elos, fighters_csv_path=FIGHTERS_CSV_PATH):
77
  """
78
  Adds the final ELO scores as a new column to the fighters CSV data.
79
  """
src/analyze.py CHANGED
@@ -1,25 +1,15 @@
1
- import json
2
  import pandas as pd
3
 
4
- ufc_events = json.load(open('output/ufc_fights.json'))
5
- ufc_events_csv = pd.read_csv('output/ufc_fights.csv')
6
- ufc_fighters_csv = pd.read_csv('output/ufc_fighters.csv')
7
-
8
-
9
- unique_fighters = set()
10
-
11
- for event in ufc_events:
12
- for fight in event['fights']:
13
- unique_fighters.add(fight['fighter_1'])
14
- unique_fighters.add(fight['fighter_2'])
15
-
16
- unique_fighters_csv=set()
17
- for fight in ufc_events_csv['fighter_1']:
18
- unique_fighters_csv.add(fight)
19
- for fight in ufc_events_csv['fighter_2']:
20
- unique_fighters_csv.add(fight)
21
-
22
- print(len(unique_fighters))
23
- print(len(unique_fighters_csv))
24
 
 
 
 
 
 
 
 
25
 
 
 
 
 
1
  import pandas as pd
2
 
3
+ ufc_fights = pd.read_csv('output/ufc_fights.csv')
4
+ ufc_fighters = pd.read_csv('output/ufc_fighters.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ print(f"Number of fighters registered in UFC: {len(ufc_fighters)}")
7
+ unique_fighters=set()
8
+ for fight in ufc_fights['fighter_1']:
9
+ unique_fighters.add(fight)
10
+ for fight in ufc_fights['fighter_2']:
11
+ unique_fighters.add(fight)
12
+ print(f"Number of fighters who have at least one fight: {len(unique_fighters)}")
13
 
14
+ highest_elo_fighters=ufc_fighters.sort_values(by='elo', ascending=False).head(20)
15
+ print(highest_elo_fighters)
src/main.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+
3
+ # Run scrape.main
4
+ # Run analysis.elo to add elo
src/predict/__init__.py ADDED
File without changes
src/predict/predict.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ import sys
4
+ from datetime import datetime
5
+ from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
6
+
7
+ def load_fighters_data():
8
+ """Loads fighter data, including ELO scores, into a dictionary."""
9
+ if not os.path.exists(FIGHTERS_CSV_PATH):
10
+ print(f"Error: Fighter data not found at '{FIGHTERS_CSV_PATH}'.")
11
+ print("Please run the ELO analysis first ('python -m src.analysis.elo').")
12
+ return None
13
+
14
+ fighters = {}
15
+ with open(FIGHTERS_CSV_PATH, 'r', encoding='utf-8') as f:
16
+ reader = csv.DictReader(f)
17
+ for row in reader:
18
+ full_name = f"{row['first_name']} {row['last_name']}".strip()
19
+ fighters[full_name] = {'elo': float(row.get('elo', 1500))} # Default ELO if missing
20
+ return fighters
21
+
22
+ def load_fights_data():
23
+ """Loads fight data and sorts it chronologically."""
24
+ if not os.path.exists(FIGHTS_CSV_PATH):
25
+ print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'.")
26
+ return None
27
+
28
+ with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
29
+ fights = list(csv.DictReader(f))
30
+
31
+ # Sort fights chronologically to ensure a proper train/test split later
32
+ fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
33
+ return fights
34
+
35
+ def run_elo_baseline_model(fights, fighters):
36
+ """
37
+ Runs a simple baseline prediction model where the fighter with the higher ELO is predicted to win.
38
+ """
39
+ correct_predictions = 0
40
+ total_predictions = 0
41
+
42
+ for fight in fights:
43
+ fighter1_name = fight['fighter_1']
44
+ fighter2_name = fight['fighter_2']
45
+ actual_winner = fight['winner']
46
+
47
+ # Skip fights that are draws or no contests
48
+ if actual_winner in ["Draw", "NC", ""]:
49
+ continue
50
+
51
+ fighter1 = fighters.get(fighter1_name)
52
+ fighter2 = fighters.get(fighter2_name)
53
+
54
+ if not fighter1 or not fighter2:
55
+ continue # Skip if fighter data is missing
56
+
57
+ elo1 = fighter1.get('elo', 1500)
58
+ elo2 = fighter2.get('elo', 1500)
59
+
60
+ # Predict winner based on higher ELO
61
+ predicted_winner = fighter1_name if elo1 > elo2 else fighter2_name
62
+
63
+ if predicted_winner == actual_winner:
64
+ correct_predictions += 1
65
+
66
+ total_predictions += 1
67
+
68
+ accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
69
+ return accuracy, total_predictions
70
+
71
+ def main():
72
+ """
73
+ Main function to run the prediction pipeline.
74
+ """
75
+ print("--- Starting ML Prediction Pipeline ---")
76
+
77
+ # Load data
78
+ fighters_data = load_fighters_data()
79
+ fights_data = load_fights_data()
80
+
81
+ if not fighters_data or not fights_data:
82
+ print("Aborting pipeline due to missing data.")
83
+ return
84
+
85
+ # Run baseline model
86
+ print("\nRunning Baseline Model (Predicting winner by highest ELO)...")
87
+ accuracy, total_fights = run_elo_baseline_model(fights_data, fighters_data)
88
+
89
+ print("\n--- Baseline Model Evaluation ---")
90
+ print(f"Total Fights Evaluated: {total_fights}")
91
+ print(f"Model Accuracy: {accuracy:.2f}%")
92
+ print("---------------------------------")
93
+
94
+ if __name__ == '__main__':
95
+ main()
src/scrape/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'scrape' directory a package.
src/scrape/config.py CHANGED
@@ -1,13 +1,9 @@
1
  import os
2
 
3
- # --- Directory Paths ---
4
  OUTPUT_DIR = 'output'
5
 
6
- # --- File Paths ---
7
- # JSON files (temporary)
8
  EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
9
  FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
10
 
11
- # CSV files (final output)
12
  FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
13
  FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
 
1
  import os
2
 
 
3
  OUTPUT_DIR = 'output'
4
 
 
 
5
  EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
6
  FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
7
 
 
8
  FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
9
  FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')