|
import pandas as pd
|
|
import streamlit as st
|
|
from config.config import EXCEL_PATH
|
|
|
|
|
|
def clean_dataframe(df):
|
|
|
|
categories = df.iloc[0]
|
|
|
|
sub_categories = df.iloc[2]
|
|
|
|
|
|
new_columns = {}
|
|
current_category = None
|
|
|
|
|
|
for i, (cat, subcat) in enumerate(zip(categories, sub_categories)):
|
|
if pd.notna(cat):
|
|
current_category = cat
|
|
if pd.notna(subcat) and current_category is not None:
|
|
new_name = f"{current_category}_{subcat}"
|
|
new_columns[df.columns[i]] = new_name
|
|
|
|
|
|
new_columns[df.columns[0]] = 'name'
|
|
|
|
|
|
df = df.rename(columns=new_columns)
|
|
|
|
|
|
df = df.iloc[3:].reset_index(drop=True)
|
|
|
|
|
|
df = df.dropna(subset=['name']).reset_index(drop=True)
|
|
|
|
|
|
df['numero'] = df['name'].str.extract(r'^(\d+)').fillna('')
|
|
df['name'] = df['name'].str.replace(r'^\d+\s*-\s*', '', regex=True).str.strip()
|
|
|
|
|
|
columns_to_keep = ['numero', 'name'] + [col for col in df.columns if '_' in str(col)]
|
|
df = df[columns_to_keep]
|
|
|
|
return df
|
|
|
|
@st.cache_data
|
|
def load_data():
|
|
"""Charge et prépare les données depuis le fichier Excel"""
|
|
try:
|
|
if not EXCEL_PATH.exists():
|
|
st.error(f"Le fichier Excel n'a pas été trouvé à l'emplacement : {EXCEL_PATH}")
|
|
st.info("Veuillez placer votre fichier stats.xlsx dans le dossier 'data'")
|
|
return None
|
|
|
|
all_sheets = pd.read_excel(EXCEL_PATH, sheet_name=None)
|
|
clean_data = []
|
|
|
|
for ville, df in all_sheets.items():
|
|
if ville != "Promedio partidos":
|
|
clean_df = clean_dataframe(df)
|
|
if not clean_df.empty:
|
|
clean_df['Ville'] = ville
|
|
clean_data.append(clean_df)
|
|
|
|
|
|
|
|
if not clean_data:
|
|
st.warning("Aucune donnée trouvée")
|
|
return None
|
|
|
|
return pd.concat(clean_data, ignore_index=True)
|
|
|
|
except Exception as e:
|
|
st.error(f"Erreur lors du chargement des données: {str(e)}")
|
|
return None
|
|
|
|
|