Spaces:
Sleeping
Sleeping
Alvaro
commited on
Commit
·
d48eef6
1
Parent(s):
c17726e
Baseline
Browse files- .gitignore +1 -0
- src/__init__.py +1 -0
- src/analysis/__init__.py +1 -0
- src/analysis/elo.py +6 -3
- src/analyze.py +11 -21
- src/main.py +4 -0
- src/predict/__init__.py +0 -0
- src/predict/predict.py +95 -0
- src/scrape/__init__.py +1 -0
- src/scrape/config.py +0 -4
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
*__pycache__/
|
|
|
2 |
example_event.html
|
|
|
1 |
*__pycache__/
|
2 |
+
output/
|
3 |
example_event.html
|
src/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file makes the 'src' directory a package.
|
src/analysis/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file makes the 'analysis' directory a package.
|
src/analysis/elo.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
import csv
|
2 |
import os
|
3 |
from datetime import datetime
|
|
|
|
|
|
|
4 |
|
5 |
# --- ELO Configuration ---
|
6 |
INITIAL_ELO = 1500
|
7 |
-
K_FACTOR =
|
8 |
# --- End Configuration ---
|
9 |
|
10 |
def calculate_expected_score(rating1, rating2):
|
@@ -27,7 +30,7 @@ def update_elo_draw(elo1, elo2):
|
|
27 |
|
28 |
return elo1 + change1, elo2 + change2
|
29 |
|
30 |
-
def process_fights_for_elo(fights_csv_path=
|
31 |
"""
|
32 |
Processes all fights chronologically to calculate final ELO scores for all fighters.
|
33 |
"""
|
@@ -70,7 +73,7 @@ def process_fights_for_elo(fights_csv_path='output/ufc_fights.csv'):
|
|
70 |
|
71 |
return elos
|
72 |
|
73 |
-
def add_elo_to_fighters_csv(elos, fighters_csv_path=
|
74 |
"""
|
75 |
Adds the final ELO scores as a new column to the fighters CSV data.
|
76 |
"""
|
|
|
1 |
import csv
|
2 |
import os
|
3 |
from datetime import datetime
|
4 |
+
import sys
|
5 |
+
|
6 |
+
from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
|
7 |
|
8 |
# --- ELO Configuration ---
|
9 |
INITIAL_ELO = 1500
|
10 |
+
K_FACTOR = 40
|
11 |
# --- End Configuration ---
|
12 |
|
13 |
def calculate_expected_score(rating1, rating2):
|
|
|
30 |
|
31 |
return elo1 + change1, elo2 + change2
|
32 |
|
33 |
+
def process_fights_for_elo(fights_csv_path=FIGHTS_CSV_PATH):
|
34 |
"""
|
35 |
Processes all fights chronologically to calculate final ELO scores for all fighters.
|
36 |
"""
|
|
|
73 |
|
74 |
return elos
|
75 |
|
76 |
+
def add_elo_to_fighters_csv(elos, fighters_csv_path=FIGHTERS_CSV_PATH):
|
77 |
"""
|
78 |
Adds the final ELO scores as a new column to the fighters CSV data.
|
79 |
"""
|
src/analyze.py
CHANGED
@@ -1,25 +1,15 @@
|
|
1 |
-
import json
|
2 |
import pandas as pd
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
ufc_fighters_csv = pd.read_csv('output/ufc_fighters.csv')
|
7 |
-
|
8 |
-
|
9 |
-
unique_fighters = set()
|
10 |
-
|
11 |
-
for event in ufc_events:
|
12 |
-
for fight in event['fights']:
|
13 |
-
unique_fighters.add(fight['fighter_1'])
|
14 |
-
unique_fighters.add(fight['fighter_2'])
|
15 |
-
|
16 |
-
unique_fighters_csv=set()
|
17 |
-
for fight in ufc_events_csv['fighter_1']:
|
18 |
-
unique_fighters_csv.add(fight)
|
19 |
-
for fight in ufc_events_csv['fighter_2']:
|
20 |
-
unique_fighters_csv.add(fight)
|
21 |
-
|
22 |
-
print(len(unique_fighters))
|
23 |
-
print(len(unique_fighters_csv))
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
|
3 |
+
ufc_fights = pd.read_csv('output/ufc_fights.csv')
|
4 |
+
ufc_fighters = pd.read_csv('output/ufc_fighters.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
print(f"Number of fighters registered in UFC: {len(ufc_fighters)}")
|
7 |
+
unique_fighters=set()
|
8 |
+
for fight in ufc_fights['fighter_1']:
|
9 |
+
unique_fighters.add(fight)
|
10 |
+
for fight in ufc_fights['fighter_2']:
|
11 |
+
unique_fighters.add(fight)
|
12 |
+
print(f"Number of fighters who have at least one fight: {len(unique_fighters)}")
|
13 |
|
14 |
+
highest_elo_fighters=ufc_fighters.sort_values(by='elo', ascending=False).head(20)
|
15 |
+
print(highest_elo_fighters)
|
src/main.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
# Run scrape.main
|
4 |
+
# Run analysis.elo to add elo
|
src/predict/__init__.py
ADDED
File without changes
|
src/predict/predict.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
from datetime import datetime
|
5 |
+
from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
|
6 |
+
|
7 |
+
def load_fighters_data():
|
8 |
+
"""Loads fighter data, including ELO scores, into a dictionary."""
|
9 |
+
if not os.path.exists(FIGHTERS_CSV_PATH):
|
10 |
+
print(f"Error: Fighter data not found at '{FIGHTERS_CSV_PATH}'.")
|
11 |
+
print("Please run the ELO analysis first ('python -m src.analysis.elo').")
|
12 |
+
return None
|
13 |
+
|
14 |
+
fighters = {}
|
15 |
+
with open(FIGHTERS_CSV_PATH, 'r', encoding='utf-8') as f:
|
16 |
+
reader = csv.DictReader(f)
|
17 |
+
for row in reader:
|
18 |
+
full_name = f"{row['first_name']} {row['last_name']}".strip()
|
19 |
+
fighters[full_name] = {'elo': float(row.get('elo', 1500))} # Default ELO if missing
|
20 |
+
return fighters
|
21 |
+
|
22 |
+
def load_fights_data():
|
23 |
+
"""Loads fight data and sorts it chronologically."""
|
24 |
+
if not os.path.exists(FIGHTS_CSV_PATH):
|
25 |
+
print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'.")
|
26 |
+
return None
|
27 |
+
|
28 |
+
with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
|
29 |
+
fights = list(csv.DictReader(f))
|
30 |
+
|
31 |
+
# Sort fights chronologically to ensure a proper train/test split later
|
32 |
+
fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
|
33 |
+
return fights
|
34 |
+
|
35 |
+
def run_elo_baseline_model(fights, fighters):
|
36 |
+
"""
|
37 |
+
Runs a simple baseline prediction model where the fighter with the higher ELO is predicted to win.
|
38 |
+
"""
|
39 |
+
correct_predictions = 0
|
40 |
+
total_predictions = 0
|
41 |
+
|
42 |
+
for fight in fights:
|
43 |
+
fighter1_name = fight['fighter_1']
|
44 |
+
fighter2_name = fight['fighter_2']
|
45 |
+
actual_winner = fight['winner']
|
46 |
+
|
47 |
+
# Skip fights that are draws or no contests
|
48 |
+
if actual_winner in ["Draw", "NC", ""]:
|
49 |
+
continue
|
50 |
+
|
51 |
+
fighter1 = fighters.get(fighter1_name)
|
52 |
+
fighter2 = fighters.get(fighter2_name)
|
53 |
+
|
54 |
+
if not fighter1 or not fighter2:
|
55 |
+
continue # Skip if fighter data is missing
|
56 |
+
|
57 |
+
elo1 = fighter1.get('elo', 1500)
|
58 |
+
elo2 = fighter2.get('elo', 1500)
|
59 |
+
|
60 |
+
# Predict winner based on higher ELO
|
61 |
+
predicted_winner = fighter1_name if elo1 > elo2 else fighter2_name
|
62 |
+
|
63 |
+
if predicted_winner == actual_winner:
|
64 |
+
correct_predictions += 1
|
65 |
+
|
66 |
+
total_predictions += 1
|
67 |
+
|
68 |
+
accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
|
69 |
+
return accuracy, total_predictions
|
70 |
+
|
71 |
+
def main():
|
72 |
+
"""
|
73 |
+
Main function to run the prediction pipeline.
|
74 |
+
"""
|
75 |
+
print("--- Starting ML Prediction Pipeline ---")
|
76 |
+
|
77 |
+
# Load data
|
78 |
+
fighters_data = load_fighters_data()
|
79 |
+
fights_data = load_fights_data()
|
80 |
+
|
81 |
+
if not fighters_data or not fights_data:
|
82 |
+
print("Aborting pipeline due to missing data.")
|
83 |
+
return
|
84 |
+
|
85 |
+
# Run baseline model
|
86 |
+
print("\nRunning Baseline Model (Predicting winner by highest ELO)...")
|
87 |
+
accuracy, total_fights = run_elo_baseline_model(fights_data, fighters_data)
|
88 |
+
|
89 |
+
print("\n--- Baseline Model Evaluation ---")
|
90 |
+
print(f"Total Fights Evaluated: {total_fights}")
|
91 |
+
print(f"Model Accuracy: {accuracy:.2f}%")
|
92 |
+
print("---------------------------------")
|
93 |
+
|
94 |
+
if __name__ == '__main__':
|
95 |
+
main()
|
src/scrape/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file makes the 'scrape' directory a package.
|
src/scrape/config.py
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
import os
|
2 |
|
3 |
-
# --- Directory Paths ---
|
4 |
OUTPUT_DIR = 'output'
|
5 |
|
6 |
-
# --- File Paths ---
|
7 |
-
# JSON files (temporary)
|
8 |
EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
|
9 |
FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
|
10 |
|
11 |
-
# CSV files (final output)
|
12 |
FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
|
13 |
FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
|
|
|
1 |
import os
|
2 |
|
|
|
3 |
OUTPUT_DIR = 'output'
|
4 |
|
|
|
|
|
5 |
EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
|
6 |
FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
|
7 |
|
|
|
8 |
FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
|
9 |
FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
|