Spaces:

AlvaroMros
/

ufc-predictor

Sleeping

App Files Files Community

Alvaro commited on Jul 3

Commit

d48eef6

1 Parent(s): c17726e

Baseline

Browse files

Files changed (10) hide show

.gitignore +1 -0
src/__init__.py +1 -0
src/analysis/__init__.py +1 -0
src/analysis/elo.py +6 -3
src/analyze.py +11 -21
src/main.py +4 -0
src/predict/__init__.py +0 -0
src/predict/predict.py +95 -0
src/scrape/__init__.py +1 -0
src/scrape/config.py +0 -4

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 *__pycache__/
 example_event.html

 *__pycache__/
+output/
 example_event.html

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the 'src' directory a package.

src/analysis/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the 'analysis' directory a package.

src/analysis/elo.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import csv
 import os
 from datetime import datetime
 # --- ELO Configuration ---
 INITIAL_ELO = 1500
-K_FACTOR = 32
 # --- End Configuration ---
 def calculate_expected_score(rating1, rating2):
@@ -27,7 +30,7 @@ def update_elo_draw(elo1, elo2):
     return elo1 + change1, elo2 + change2
-def process_fights_for_elo(fights_csv_path='output/ufc_fights.csv'):
     """
     Processes all fights chronologically to calculate final ELO scores for all fighters.
     """
@@ -70,7 +73,7 @@ def process_fights_for_elo(fights_csv_path='output/ufc_fights.csv'):
     return elos
-def add_elo_to_fighters_csv(elos, fighters_csv_path='output/ufc_fighters.csv'):
     """
     Adds the final ELO scores as a new column to the fighters CSV data.
     """

 import csv
 import os
 from datetime import datetime
+import sys
+from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
 # --- ELO Configuration ---
 INITIAL_ELO = 1500
+K_FACTOR = 40
 # --- End Configuration ---
 def calculate_expected_score(rating1, rating2):
     return elo1 + change1, elo2 + change2
+def process_fights_for_elo(fights_csv_path=FIGHTS_CSV_PATH):
     """
     Processes all fights chronologically to calculate final ELO scores for all fighters.
     """
     return elos
+def add_elo_to_fighters_csv(elos, fighters_csv_path=FIGHTERS_CSV_PATH):
     """
     Adds the final ELO scores as a new column to the fighters CSV data.
     """

src/analyze.py CHANGED Viewed

@@ -1,25 +1,15 @@
-import json
 import pandas as pd
-ufc_events = json.load(open('output/ufc_fights.json'))
-ufc_events_csv = pd.read_csv('output/ufc_fights.csv')
-ufc_fighters_csv = pd.read_csv('output/ufc_fighters.csv')
-unique_fighters = set()
-for event in ufc_events:
-    for fight in event['fights']:
-        unique_fighters.add(fight['fighter_1'])
-        unique_fighters.add(fight['fighter_2'])
-unique_fighters_csv=set()
-for fight in ufc_events_csv['fighter_1']:
-    unique_fighters_csv.add(fight)
-for fight in ufc_events_csv['fighter_2']:
-    unique_fighters_csv.add(fight)
-print(len(unique_fighters))
-print(len(unique_fighters_csv))

 import pandas as pd
+ufc_fights = pd.read_csv('output/ufc_fights.csv')
+ufc_fighters = pd.read_csv('output/ufc_fighters.csv')
+print(f"Number of fighters registered in UFC: {len(ufc_fighters)}")
+unique_fighters=set()
+for fight in ufc_fights['fighter_1']:
+    unique_fighters.add(fight)
+for fight in ufc_fights['fighter_2']:
+    unique_fighters.add(fight)
+print(f"Number of fighters who have at least one fight: {len(unique_fighters)}")
+highest_elo_fighters=ufc_fighters.sort_values(by='elo', ascending=False).head(20)
+print(highest_elo_fighters)

src/main.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+
2	+
3	+ # Run scrape.main
4	+ # Run analysis.elo to add elo

src/predict/__init__.py ADDED Viewed

File without changes

src/predict/predict.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import csv
+import os
+import sys
+from datetime import datetime
+from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
+def load_fighters_data():
+    """Loads fighter data, including ELO scores, into a dictionary."""
+    if not os.path.exists(FIGHTERS_CSV_PATH):
+        print(f"Error: Fighter data not found at '{FIGHTERS_CSV_PATH}'.")
+        print("Please run the ELO analysis first ('python -m src.analysis.elo').")
+        return None
+    fighters = {}
+    with open(FIGHTERS_CSV_PATH, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            full_name = f"{row['first_name']} {row['last_name']}".strip()
+            fighters[full_name] = {'elo': float(row.get('elo', 1500))} # Default ELO if missing
+    return fighters
+def load_fights_data():
+    """Loads fight data and sorts it chronologically."""
+    if not os.path.exists(FIGHTS_CSV_PATH):
+        print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'.")
+        return None
+    with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
+        fights = list(csv.DictReader(f))
+    # Sort fights chronologically to ensure a proper train/test split later
+    fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
+    return fights
+def run_elo_baseline_model(fights, fighters):
+    """
+    Runs a simple baseline prediction model where the fighter with the higher ELO is predicted to win.
+    """
+    correct_predictions = 0
+    total_predictions = 0
+    for fight in fights:
+        fighter1_name = fight['fighter_1']
+        fighter2_name = fight['fighter_2']
+        actual_winner = fight['winner']
+        # Skip fights that are draws or no contests
+        if actual_winner in ["Draw", "NC", ""]:
+            continue
+        fighter1 = fighters.get(fighter1_name)
+        fighter2 = fighters.get(fighter2_name)
+        if not fighter1 or not fighter2:
+            continue # Skip if fighter data is missing
+        elo1 = fighter1.get('elo', 1500)
+        elo2 = fighter2.get('elo', 1500)
+        # Predict winner based on higher ELO
+        predicted_winner = fighter1_name if elo1 > elo2 else fighter2_name
+        if predicted_winner == actual_winner:
+            correct_predictions += 1
+        total_predictions += 1
+    accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
+    return accuracy, total_predictions
+def main():
+    """
+    Main function to run the prediction pipeline.
+    """
+    print("--- Starting ML Prediction Pipeline ---")
+    # Load data
+    fighters_data = load_fighters_data()
+    fights_data = load_fights_data()
+    if not fighters_data or not fights_data:
+        print("Aborting pipeline due to missing data.")
+        return
+    # Run baseline model
+    print("\nRunning Baseline Model (Predicting winner by highest ELO)...")
+    accuracy, total_fights = run_elo_baseline_model(fights_data, fighters_data)
+    print("\n--- Baseline Model Evaluation ---")
+    print(f"Total Fights Evaluated: {total_fights}")
+    print(f"Model Accuracy: {accuracy:.2f}%")
+    print("---------------------------------")
+if __name__ == '__main__':
+    main()

src/scrape/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the 'scrape' directory a package.

src/scrape/config.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import os
-# --- Directory Paths ---
 OUTPUT_DIR = 'output'
-# --- File Paths ---
-# JSON files (temporary)
 EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
 FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
-# CSV files (final output)
 FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
 FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')

 import os
 OUTPUT_DIR = 'output'
 EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
 FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
 FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
 FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')