BiotechU2 / app.py
C2MV's picture
Update app.py
78bc459 verified
import os
os.system("pip install --upgrade gradio")
from pydantic import BaseModel, ConfigDict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.integrate import odeint
from scipy.optimize import curve_fit
from sklearn.metrics import mean_squared_error
import gradio as gr
import io
from PIL import Image
import tempfile
class YourModel(BaseModel):
class Config:
arbitrary_types_allowed = True
class BioprocessModel:
def __init__(self, model_type='logistic', maxfev=50000):
self.params = {}
self.r2 = {}
self.rmse = {}
self.datax = []
self.datas = []
self.datap = []
self.dataxp = []
self.datasp = []
self.datapp = []
self.datax_std = []
self.datas_std = []
self.datap_std = []
self.biomass_model = None
self.biomass_diff = None # Store the differential equation function
self.model_type = model_type
self.maxfev = maxfev
self.time = None
@staticmethod
def logistic(time, xo, xm, um):
# xo: initial biomass, xm: max biomass, um: max specific growth rate
if xm == 0 or (xo / xm == 1 and np.any(um * time > 0)):
return np.full_like(time, np.nan)
# Add a small epsilon to prevent division by zero or log of zero in edge cases
term_exp = np.exp(um * time)
denominator = (1 - (xo / xm) * (1 - term_exp))
denominator = np.where(denominator == 0, 1e-9, denominator) # Avoid division by zero
# Ensure xo/xm is not 1 if (1-exp(um*time)) is also 0 (i.e. um*time = 0)
# This is usually handled by xo < xm constraint in fitting
return (xo * term_exp) / denominator
@staticmethod
def gompertz(time, xm, um, lag):
# xm: max biomass, um: max specific growth rate, lag: lag time
if xm == 0:
return np.full_like(time, np.nan)
# Add small epsilon to prevent log(0) if exp_term becomes very large negative
exp_term = (um * np.e / xm) * (lag - time) + 1
# Clamp large negative values in exp_term to avoid overflow in np.exp(-np.exp(exp_term))
exp_term_clipped = np.clip(exp_term, -np.inf, 700) # exp(709) is around max float
return xm * np.exp(-np.exp(exp_term_clipped))
@staticmethod
def moser(time, Xm, um, Ks):
# Xm: max biomass, um: max specific growth rate, Ks: Monod constant (here acting as time shift)
# This is a simplified form, not the substrate-dependent Moser.
return Xm * (1 - np.exp(-um * (time - Ks)))
@staticmethod
def baranyi(time, X0, Xm, um, lag):
# X0: initial biomass, Xm: max biomass, um: max specific growth rate, lag: lag time
# Ensure parameters are valid to prevent math errors
if X0 <= 0 or Xm <= X0 or um <= 0: # lag can be 0
return np.full_like(time, np.nan)
# Adjustment function A(t)
# Using h0 = um for simplicity in A(t) calculation
# A_t = t + (1/um) * np.log(np.exp(-um*t) + np.exp(-um*lag) - np.exp(-um*(t+lag)))
# Argument of log in A(t):
log_arg_A = np.exp(-um * t) + np.exp(-um * lag) - np.exp(-um * (t + lag))
log_arg_A = np.where(log_arg_A <= 1e-9, 1e-9, log_arg_A) # Prevent log(0 or negative)
A_t = t + (1 / um) * np.log(log_arg_A)
# Main Baranyi equation part
exp_um_At = np.exp(um * A_t)
# Clamp large values to prevent overflow if Xm/X0 is large
exp_um_At_clipped = np.clip(exp_um_At, -np.inf, 700)
numerator = (Xm / X0) * exp_um_At_clipped
denominator = (Xm / X0 - 1) + exp_um_At_clipped
denominator = np.where(denominator == 0, 1e-9, denominator) # Avoid division by zero
return X0 * (numerator / denominator)
@staticmethod
def logistic_diff(X, t, params):
# params for logistic_diff: [xo, xm, um] (xo is not used in diff eq, but passed for consistency)
_, xm, um = params
if xm == 0: return 0
return um * X * (1 - X / xm)
@staticmethod
def gompertz_diff(X, t, params):
# params for gompertz_diff: [xm, um, lag]
xm, um, lag = params
if xm == 0: return 0
# This is d(Gompertz)/dt
# Gompertz: xm * exp(-exp( (um*e/xm)*(lag-t)+1 ))
# Let k = (um*e/xm)
# Let u = (k*(lag-t)+1)
# dX/dt = X * (-exp(u)) * k * (-1) = X * k * exp(u)
k_val = um * np.e / xm
u_val = k_val * (lag - t) + 1
u_val_clipped = np.clip(u_val, -np.inf, 700)
return X * k_val * np.exp(u_val_clipped)
@staticmethod
def moser_diff(X, t, params):
# params for moser_diff: [Xm, um, Ks]
Xm, um, _ = params # Ks is not directly in this simplified dX/dt
return um * (Xm - X)
# No differential form for Baranyi in this version due to complexity.
def substrate(self, time, so, p, q, biomass_params_list):
if self.biomass_model is None or not biomass_params_list:
return np.full_like(time, np.nan)
X_t = self.biomass_model(time, *biomass_params_list)
if np.any(np.isnan(X_t)):
return np.full_like(time, np.nan)
integral_X = np.zeros_like(X_t)
if len(time) > 1:
dt = np.diff(time, prepend=time[0] - (time[1]-time[0] if len(time)>1 else 1))
integral_X = np.cumsum(X_t * dt)
# Determine X0 (initial biomass) from the fitted parameters
if self.model_type == 'logistic' or self.model_type == 'baranyi':
X0 = biomass_params_list[0] # xo or X0 is the first parameter
elif self.model_type == 'gompertz':
X0 = self.gompertz(0, *biomass_params_list)
elif self.model_type == 'moser':
X0 = self.moser(0, *biomass_params_list)
else:
X0 = X_t[0] # Fallback
X0 = X0 if not np.isnan(X0) else (biomass_params_list[0] if biomass_params_list else 0)
return so - p * (X_t - X0) - q * integral_X
def product(self, time, po, alpha, beta, biomass_params_list):
if self.biomass_model is None or not biomass_params_list:
return np.full_like(time, np.nan)
X_t = self.biomass_model(time, *biomass_params_list)
if np.any(np.isnan(X_t)):
return np.full_like(time, np.nan)
integral_X = np.zeros_like(X_t)
if len(time) > 1:
dt = np.diff(time, prepend=time[0] - (time[1]-time[0] if len(time)>1 else 1))
integral_X = np.cumsum(X_t * dt)
if self.model_type == 'logistic' or self.model_type == 'baranyi':
X0 = biomass_params_list[0]
elif self.model_type == 'gompertz':
X0 = self.gompertz(0, *biomass_params_list)
elif self.model_type == 'moser':
X0 = self.moser(0, *biomass_params_list)
else:
X0 = X_t[0]
X0 = X0 if not np.isnan(X0) else (biomass_params_list[0] if biomass_params_list else 0)
return po + alpha * (X_t - X0) + beta * integral_X
def process_data(self, df):
# ... (same as before)
biomass_cols = [col for col in df.columns if col[1] == 'Biomasa']
substrate_cols = [col for col in df.columns if col[1] == 'Sustrato']
product_cols = [col for col in df.columns if col[1] == 'Producto']
if not any(col[1] == 'Tiempo' for col in df.columns):
raise ValueError("La columna 'Tiempo' no se encuentra en el DataFrame.")
time_col = [col for col in df.columns if col[1] == 'Tiempo'][0]
time = df[time_col].dropna().values # Ensure no NaNs in time
if len(biomass_cols) > 0:
data_biomass = [df[col].dropna().values for col in biomass_cols] # dropna for each replicate
# Ensure all replicates have same length as time after dropna
min_len = len(time)
data_biomass_aligned = []
for rep_data in data_biomass:
if len(rep_data) == min_len:
data_biomass_aligned.append(rep_data)
# else: print warning or handle misaligned data
if data_biomass_aligned:
data_biomass_np = np.array(data_biomass_aligned)
self.datax.append(data_biomass_np)
self.dataxp.append(np.mean(data_biomass_np, axis=0))
self.datax_std.append(np.std(data_biomass_np, axis=0, ddof=1))
else: # If no valid replicates after alignment
self.datax.append(np.array([]))
self.dataxp.append(np.array([]))
self.datax_std.append(np.array([]))
else:
self.datax.append(np.array([]))
self.dataxp.append(np.array([]))
self.datax_std.append(np.array([]))
if len(substrate_cols) > 0:
data_substrate = [df[col].dropna().values for col in substrate_cols]
min_len = len(time)
data_substrate_aligned = []
for rep_data in data_substrate:
if len(rep_data) == min_len:
data_substrate_aligned.append(rep_data)
if data_substrate_aligned:
data_substrate_np = np.array(data_substrate_aligned)
self.datas.append(data_substrate_np)
self.datasp.append(np.mean(data_substrate_np, axis=0))
self.datas_std.append(np.std(data_substrate_np, axis=0, ddof=1))
else:
self.datas.append(np.array([]))
self.datasp.append(np.array([]))
self.datas_std.append(np.array([]))
else:
self.datas.append(np.array([]))
self.datasp.append(np.array([]))
self.datas_std.append(np.array([]))
if len(product_cols) > 0:
data_product = [df[col].dropna().values for col in product_cols]
min_len = len(time)
data_product_aligned = []
for rep_data in data_product:
if len(rep_data) == min_len:
data_product_aligned.append(rep_data)
if data_product_aligned:
data_product_np = np.array(data_product_aligned)
self.datap.append(data_product_np)
self.datapp.append(np.mean(data_product_np, axis=0))
self.datap_std.append(np.std(data_product_np, axis=0, ddof=1))
else:
self.datap.append(np.array([]))
self.datapp.append(np.array([]))
self.datap_std.append(np.array([]))
else:
self.datap.append(np.array([]))
self.datapp.append(np.array([]))
self.datap_std.append(np.array([]))
self.time = time
def fit_model(self):
if self.model_type == 'logistic':
self.biomass_model = self.logistic
self.biomass_diff = self.logistic_diff
elif self.model_type == 'gompertz':
self.biomass_model = self.gompertz
self.biomass_diff = self.gompertz_diff
elif self.model_type == 'moser':
self.biomass_model = self.moser
self.biomass_diff = self.moser_diff
elif self.model_type == 'baranyi':
self.biomass_model = self.baranyi
self.biomass_diff = None # No ODE form for Baranyi in this version
else:
raise ValueError(f"Modelo de biomasa desconocido: {self.model_type}")
def fit_biomass(self, time, biomass):
# Ensure time and biomass are 1D arrays of the same length and numeric
time = np.asarray(time, dtype=float)
biomass = np.asarray(biomass, dtype=float)
if len(time) != len(biomass):
print("Error: Tiempo y biomasa deben tener la misma longitud.")
return None
if np.any(np.isnan(time)) or np.any(np.isnan(biomass)):
print("Error: Tiempo o biomasa contienen NaNs.")
# Attempt to remove NaNs consistently
valid_indices = ~np.isnan(time) & ~np.isnan(biomass)
time = time[valid_indices]
biomass = biomass[valid_indices]
if len(time) < 3: # Need at least 3 points for 3-param models
print("No hay suficientes datos válidos después de remover NaNs.")
return None
try:
if len(np.unique(biomass)) < 2 :
print(f"Biomasa constante para {self.model_type}, no se puede ajustar el modelo.")
self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan
return None
popt = None # Initialize popt
if self.model_type == 'logistic':
xo_guess = biomass[0] if biomass[0] > 1e-6 else 1e-3
xm_guess = max(biomass) * 1.1 if max(biomass) > xo_guess else xo_guess * 2
if xm_guess <= xo_guess: xm_guess = xo_guess + 1e-3
p0 = [xo_guess, xm_guess, 0.1]
bounds = ([1e-9, biomass[0] if biomass[0]>1e-9 else 1e-9, 1e-9], [max(biomass)*0.99 if max(biomass)>0 else 1, np.inf, np.inf])
# Ensure xo_guess is within bounds[0][0] and bounds[1][0]
p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0])
popt, _ = curve_fit(self.logistic, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9)
if popt[1] <= popt[0]: # xm <= xo
print(f"Advertencia: En modelo logístico, Xm ({popt[1]:.2f}) no es mayor que Xo ({popt[0]:.2f}). Ajuste puede no ser válido.")
self.params['biomass'] = {'Xo': popt[0], 'Xm': popt[1], 'um': popt[2]}
y_pred = self.logistic(time, *popt)
elif self.model_type == 'gompertz':
xm_guess = max(biomass) if max(biomass) > 0 else 1.0
um_guess = 0.1
# A simple lag estimate: time until biomass reaches, say, 10% of (max-min)
min_bio = min(biomass)
lag_thresh = min_bio + 0.1 * (max(biomass) - min_bio)
lag_indices = np.where(biomass > lag_thresh)[0]
lag_guess = time[lag_indices[0]] if len(lag_indices) > 0 else time[0]
p0 = [xm_guess, um_guess, lag_guess]
bounds = ([min(biomass) if min(biomass)>1e-9 else 1e-9, 1e-9, 0],
[np.inf, np.inf, max(time) if len(time)>0 else 100])
popt, _ = curve_fit(self.gompertz, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9)
self.params['biomass'] = {'Xm': popt[0], 'um': popt[1], 'lag': popt[2]}
y_pred = self.gompertz(time, *popt)
elif self.model_type == 'moser':
Xm_guess = max(biomass) if max(biomass) > 0 else 1.0
um_guess = 0.1
Ks_guess = time[0]
p0 = [Xm_guess, um_guess, Ks_guess]
bounds = ([min(biomass) if min(biomass)>1e-9 else 1e-9, 1e-9, -max(time) if len(time)>0 else -100], # Ks can be negative
[np.inf, np.inf, max(time) if len(time)>0 else 100])
popt, _ = curve_fit(self.moser, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9)
self.params['biomass'] = {'Xm': popt[0], 'um': popt[1], 'Ks': popt[2]}
y_pred = self.moser(time, *popt)
elif self.model_type == 'baranyi':
X0_guess = biomass[0] if biomass[0] > 1e-6 else 1e-3
Xm_guess = max(biomass) if max(biomass) > X0_guess else X0_guess * 2
if Xm_guess <= X0_guess: Xm_guess = X0_guess + 1e-3 # Ensure Xm > X0
um_guess = 0.1
min_bio = X0_guess
lag_thresh = min_bio + 0.1 * (Xm_guess - min_bio)
lag_indices = np.where(biomass > lag_thresh)[0]
lag_guess = time[lag_indices[0]] if len(lag_indices) > 0 and time[lag_indices[0]] > 0 else (time[0] if time[0] > 1e-9 else 1e-9) # lag must be >0 for some A(t) forms
if lag_guess <= 0: lag_guess = 1e-9 # Ensure lag is positive for Baranyi A(t) log
p0 = [X0_guess, Xm_guess, um_guess, lag_guess]
bounds = (
[1e-9, biomass[0] if biomass[0]>1e-9 else 1e-9, 1e-9, 1e-9], # X0, Xm, um, lag > 0
[max(biomass)*0.99 if max(biomass)>0 else 1, np.inf, np.inf, max(time) if len(time)>0 else 100]
)
p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0]) # Clip X0_guess
p0[3] = np.clip(p0[3], bounds[0][3], bounds[1][3]) # Clip lag_guess
popt, _ = curve_fit(self.baranyi, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9)
if popt[1] <= popt[0]: # Xm <= X0
print(f"Advertencia: En modelo Baranyi, Xm ({popt[1]:.2f}) no es mayor que X0 ({popt[0]:.2f}). Ajuste puede no ser válido.")
self.params['biomass'] = {'X0': popt[0], 'Xm': popt[1], 'um': popt[2], 'lag': popt[3]}
y_pred = self.baranyi(time, *popt)
else:
print(f"Modelo {self.model_type} no implementado para ajuste de biomasa.")
return None
if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
print(f"Predicción de biomasa contiene NaN/Inf para {self.model_type}. Ajuste fallido.")
self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan
return None
ss_res = np.sum((biomass - y_pred) ** 2)
ss_tot = np.sum((biomass - np.mean(biomass)) ** 2)
if ss_tot == 0:
self.r2['biomass'] = 1.0 if ss_res < 1e-9 else 0.0 # Perfect fit if residuals are also ~0
else:
self.r2['biomass'] = 1 - (ss_res / ss_tot)
self.rmse['biomass'] = np.sqrt(mean_squared_error(biomass, y_pred))
return y_pred
except RuntimeError as e:
print(f"Error de Runtime en fit_biomass_{self.model_type} (probablemente no se pudo ajustar): {e}")
self.params['biomass'] = {}
self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan
return None
except Exception as e:
print(f"Error general en fit_biomass_{self.model_type}: {e}")
self.params['biomass'] = {}
self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan
return None
def fit_substrate(self, time, substrate, biomass_params_dict):
if not biomass_params_dict:
print(f"Error en fit_substrate_{self.model_type}: Parámetros de biomasa no disponibles.")
return None
try:
# Extract parameters based on model type into a list for self.substrate
if self.model_type == 'logistic':
# Expected by self.logistic: xo, xm, um
biomass_params_values = [biomass_params_dict['Xo'], biomass_params_dict['Xm'], biomass_params_dict['um']]
elif self.model_type == 'gompertz':
# Expected by self.gompertz: xm, um, lag
biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']]
elif self.model_type == 'moser':
# Expected by self.moser: Xm, um, Ks
biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['Ks']]
elif self.model_type == 'baranyi':
# Expected by self.baranyi: X0, Xm, um, lag
biomass_params_values = [biomass_params_dict['X0'], biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']]
else:
return None
so_guess = substrate[0] if len(substrate) > 0 else 1.0
p_guess = 0.1
q_guess = 0.01
p0 = [so_guess, p_guess, q_guess]
bounds = ([0, 0, 0], [np.inf, np.inf, np.inf])
popt, _ = curve_fit(
lambda t, so, p, q: self.substrate(t, so, p, q, biomass_params_values),
time, substrate, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9
)
self.params['substrate'] = {'so': popt[0], 'p': popt[1], 'q': popt[2]}
y_pred = self.substrate(time, *popt, biomass_params_values)
if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
print(f"Predicción de sustrato contiene NaN/Inf para {self.model_type}. Ajuste fallido.")
self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan
return None
ss_res = np.sum((substrate - y_pred) ** 2)
ss_tot = np.sum((substrate - np.mean(substrate)) ** 2)
if ss_tot == 0: self.r2['substrate'] = 1.0 if ss_res < 1e-9 else 0.0
else: self.r2['substrate'] = 1 - (ss_res / ss_tot)
self.rmse['substrate'] = np.sqrt(mean_squared_error(substrate, y_pred))
return y_pred
except RuntimeError as e:
print(f"Error de Runtime en fit_substrate_{self.model_type}: {e}")
self.params['substrate'] = {}; self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan
return None
except Exception as e:
print(f"Error general en fit_substrate_{self.model_type}: {e}")
self.params['substrate'] = {}; self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan
return None
def fit_product(self, time, product, biomass_params_dict):
if not biomass_params_dict:
print(f"Error en fit_product_{self.model_type}: Parámetros de biomasa no disponibles.")
return None
try:
if self.model_type == 'logistic':
biomass_params_values = [biomass_params_dict['Xo'], biomass_params_dict['Xm'], biomass_params_dict['um']]
elif self.model_type == 'gompertz':
biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']]
elif self.model_type == 'moser':
biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['Ks']]
elif self.model_type == 'baranyi':
biomass_params_values = [biomass_params_dict['X0'], biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']]
else:
return None
po_guess = product[0] if len(product) > 0 else 0.0
alpha_guess = 0.1
beta_guess = 0.01
p0 = [po_guess, alpha_guess, beta_guess]
bounds = ([0, 0, 0], [np.inf, np.inf, np.inf])
popt, _ = curve_fit(
lambda t, po, alpha, beta: self.product(t, po, alpha, beta, biomass_params_values),
time, product, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9
)
self.params['product'] = {'po': popt[0], 'alpha': popt[1], 'beta': popt[2]}
y_pred = self.product(time, *popt, biomass_params_values)
if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
print(f"Predicción de producto contiene NaN/Inf para {self.model_type}. Ajuste fallido.")
self.r2['product'] = np.nan; self.rmse['product'] = np.nan
return None
ss_res = np.sum((product - y_pred) ** 2)
ss_tot = np.sum((product - np.mean(product)) ** 2)
if ss_tot == 0: self.r2['product'] = 1.0 if ss_res < 1e-9 else 0.0
else: self.r2['product'] = 1 - (ss_res / ss_tot)
self.rmse['product'] = np.sqrt(mean_squared_error(product, y_pred))
return y_pred
except RuntimeError as e:
print(f"Error de Runtime en fit_product_{self.model_type}: {e}")
self.params['product'] = {}; self.r2['product'] = np.nan; self.rmse['product'] = np.nan
return None
except Exception as e:
print(f"Error general en fit_product_{self.model_type}: {e}")
self.params['product'] = {}; self.r2['product'] = np.nan; self.rmse['product'] = np.nan
return None
def generate_fine_time_grid(self, time):
# ... (same as before)
if time is None or len(time) < 2: # Need at least 2 points to define a range
return np.array([0]) if (time is None or len(time)==0) else np.array(time)
time_min, time_max = np.min(time), np.max(time)
if time_min == time_max: # If all time points are the same
return np.array([time_min])
time_fine = np.linspace(time_min, time_max, 500)
return time_fine
def system(self, y, t, biomass_params_list, substrate_params_list, product_params_list, model_type_for_ode):
# model_type_for_ode is passed to ensure we use the correct diff eq
X, S, P = y
dXdt = 0.0
if model_type_for_ode == 'logistic':
# biomass_params_list for logistic: [Xo, Xm, um]
dXdt = self.logistic_diff(X, t, biomass_params_list)
elif model_type_for_ode == 'gompertz':
# biomass_params_list for gompertz: [Xm, um, lag]
dXdt = self.gompertz_diff(X, t, biomass_params_list)
elif model_type_for_ode == 'moser':
# biomass_params_list for moser: [Xm, um, Ks]
dXdt = self.moser_diff(X, t, biomass_params_list)
# No ODE for Baranyi in this version
else:
# This case should ideally be prevented before calling system if model has no diff eq
print(f"Advertencia: Ecuación diferencial no definida para el modelo {model_type_for_ode} en la función 'system'. dXdt=0.")
dXdt = 0.0
p_val = substrate_params_list[1] if len(substrate_params_list) > 1 else 0
q_val = substrate_params_list[2] if len(substrate_params_list) > 2 else 0
dSdt = -p_val * dXdt - q_val * X
alpha_val = product_params_list[1] if len(product_params_list) > 1 else 0
beta_val = product_params_list[2] if len(product_params_list) > 2 else 0
dPdt = alpha_val * dXdt + beta_val * X
return [dXdt, dSdt, dPdt]
def get_initial_conditions(self, time, biomass, substrate, product):
X0_exp = biomass[0] if biomass is not None and len(biomass) > 0 else 0
S0_exp = substrate[0] if substrate is not None and len(substrate) > 0 else 0
P0_exp = product[0] if product is not None and len(product) > 0 else 0
X0 = X0_exp
if 'biomass' in self.params and self.params['biomass']:
if self.model_type == 'logistic':
X0 = self.params['biomass'].get('Xo', X0_exp)
elif self.model_type == 'baranyi': # Baranyi also has X0 as a direct parameter
X0 = self.params['biomass'].get('X0', X0_exp)
elif self.model_type == 'gompertz' and self.biomass_model:
# For Gompertz, X(t=0) needs to be calculated from its parameters
# Parameters: Xm, um, lag
params_list = [self.params['biomass'].get('Xm',1), self.params['biomass'].get('um',0.1), self.params['biomass'].get('lag',0)]
X0_calc = self.biomass_model(0, *params_list)
X0 = X0_calc if not np.isnan(X0_calc) else X0_exp
elif self.model_type == 'moser' and self.biomass_model:
# For Moser, X(t=0) needs to be calculated
# Parameters: Xm, um, Ks
params_list = [self.params['biomass'].get('Xm',1), self.params['biomass'].get('um',0.1), self.params['biomass'].get('Ks',0)]
X0_calc = self.biomass_model(0, *params_list)
X0 = X0_calc if not np.isnan(X0_calc) else X0_exp
S0 = self.params.get('substrate', {}).get('so', S0_exp)
P0 = self.params.get('product', {}).get('po', P0_exp)
X0 = X0 if not np.isnan(X0) else 0.0
S0 = S0 if not np.isnan(S0) else 0.0
P0 = P0 if not np.isnan(P0) else 0.0
return [X0, S0, P0]
def solve_differential_equations(self, time, biomass, substrate, product):
if self.biomass_diff is None: # Check if a differential equation is defined for this model
print(f"ODE solving no está soportado para el modelo {self.model_type}. Se usarán resultados de curve_fit.")
return None, None, None, time # Return None for solutions, original time
if 'biomass' not in self.params or not self.params['biomass']:
print("No hay parámetros de biomasa, no se pueden resolver las EDO.")
return None, None, None, time
if time is None or len(time) == 0 :
print("Tiempo no válido para resolver EDOs.")
return None, None, None, np.array([])
# Prepare biomass_params_list for ODE system based on self.model_type
# This list should match what the respective _diff function expects
if self.model_type == 'logistic':
biomass_params_list_ode = [self.params['biomass']['Xo'], self.params['biomass']['Xm'], self.params['biomass']['um']]
elif self.model_type == 'gompertz':
biomass_params_list_ode = [self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['lag']]
elif self.model_type == 'moser':
biomass_params_list_ode = [self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['Ks']]
# Baranyi does not have biomass_diff implemented here, so it's caught by self.biomass_diff is None
else:
print(f"Tipo de modelo de biomasa desconocido para EDO: {self.model_type}")
return None, None, None, time
substrate_params_list = [
self.params.get('substrate', {}).get('so', 0),
self.params.get('substrate', {}).get('p', 0),
self.params.get('substrate', {}).get('q', 0)
]
product_params_list = [
self.params.get('product', {}).get('po', 0),
self.params.get('product', {}).get('alpha', 0),
self.params.get('product', {}).get('beta', 0)
]
initial_conditions = self.get_initial_conditions(time, biomass, substrate, product)
time_fine = self.generate_fine_time_grid(time)
if len(time_fine) == 0:
print("No se pudo generar la malla de tiempo fina.")
return None, None, None, time
try:
sol = odeint(self.system, initial_conditions, time_fine,
args=(biomass_params_list_ode, substrate_params_list, product_params_list, self.model_type), # Pass self.model_type for routing in self.system
rtol=1e-6, atol=1e-6)
except Exception as e:
print(f"Error al resolver EDOs con odeint: {e}")
try:
print("Intentando con método 'lsoda'...")
sol = odeint(self.system, initial_conditions, time_fine,
args=(biomass_params_list_ode, substrate_params_list, product_params_list, self.model_type),
rtol=1e-6, atol=1e-6, method='lsoda')
except Exception as e_lsoda:
print(f"Error al resolver EDOs con odeint (método lsoda): {e_lsoda}")
return None, None, None, time_fine
X = sol[:, 0]
S = sol[:, 1]
P = sol[:, 2]
return X, S, P, time_fine
def plot_results(self, time, biomass, substrate, product,
y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, # Renamed to avoid confusion
biomass_std=None, substrate_std=None, product_std=None,
experiment_name='', legend_position='best', params_position='upper right',
show_legend=True, show_params=True,
style='whitegrid',
line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o',
use_differential=False, axis_labels=None,
show_error_bars=True, error_cap_size=3, error_line_width=1):
# Initialize predictions with curve_fit results
y_pred_biomass, y_pred_substrate, y_pred_product = y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit
if y_pred_biomass is None and not (use_differential and self.biomass_diff is not None):
print(f"No se pudo ajustar biomasa para {experiment_name} con {self.model_type} y no se usan EDO. Omitiendo figura.")
return None
# Check if ODE should be used and is supported
can_use_ode = use_differential and self.biomass_diff is not None and 'biomass' in self.params and self.params['biomass']
if use_differential and self.biomass_diff is None:
print(f"Modelo {self.model_type} no soporta EDOs. Usando ajuste directo.")
if axis_labels is None: axis_labels = {'x_label': 'Tiempo', 'biomass_label': 'Biomasa', 'substrate_label': 'Sustrato', 'product_label': 'Producto'}
sns.set_style(style)
time_to_plot = time
if can_use_ode:
X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product)
if X_ode is not None:
y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode
time_to_plot = time_fine_ode
else:
print(f"Fallo al resolver EDOs para {experiment_name}, usando resultados de curve_fit si existen.")
time_to_plot = self.generate_fine_time_grid(time) # Use fine grid for curve_fit if ODE fails
# Re-evaluate curve_fit results on fine grid if they exist
if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']:
biomass_params_values = list(self.params['biomass'].values())
y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values)
if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']:
substrate_params_values = list(self.params['substrate'].values())
y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values)
if y_pred_product_fit is not None and 'product' in self.params and self.params['product']:
product_params_values = list(self.params['product'].values())
y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values)
else: # Not using ODE or ODE not supported, use curve_fit results on a fine grid
time_to_plot = self.generate_fine_time_grid(time)
if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']:
biomass_params_values = list(self.params['biomass'].values()) # Get latest params
y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values)
if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']:
substrate_params_values = list(self.params['substrate'].values())
y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values)
else: # If substrate fit failed or no data, plot NaNs
y_pred_substrate = np.full_like(time_to_plot, np.nan)
if y_pred_product_fit is not None and 'product' in self.params and self.params['product']:
product_params_values = list(self.params['product'].values())
y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values)
else: # If product fit failed or no data, plot NaNs
y_pred_product = np.full_like(time_to_plot, np.nan)
else: # Biomass fit failed
y_pred_biomass = np.full_like(time_to_plot, np.nan)
y_pred_substrate = np.full_like(time_to_plot, np.nan)
y_pred_product = np.full_like(time_to_plot, np.nan)
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 15))
fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()})', fontsize=16)
plots_config = [
(ax1, biomass, y_pred_biomass, biomass_std, axis_labels['biomass_label'], 'Modelo', self.params.get('biomass', {}),
self.r2.get('biomass', np.nan), self.rmse.get('biomass', np.nan)),
(ax2, substrate, y_pred_substrate, substrate_std, axis_labels['substrate_label'], 'Modelo', self.params.get('substrate', {}),
self.r2.get('substrate', np.nan), self.rmse.get('substrate', np.nan)),
(ax3, product, y_pred_product, product_std, axis_labels['product_label'], 'Modelo', self.params.get('product', {}),
self.r2.get('product', np.nan), self.rmse.get('product', np.nan))
]
# ... (rest of plot_results is the same as your provided code, using the new y_pred variables)
for idx, (ax, data_exp, y_pred_model, data_std_exp, ylabel, model_name_legend, params_dict, r2_val, rmse_val) in enumerate(plots_config):
if data_exp is not None and len(data_exp) > 0 and not np.all(np.isnan(data_exp)):
if show_error_bars and data_std_exp is not None and len(data_std_exp) == len(data_exp) and not np.all(np.isnan(data_std_exp)):
ax.errorbar(
time, data_exp, yerr=data_std_exp,
fmt=marker_style, color=point_color,
label='Datos experimentales',
capsize=error_cap_size,
elinewidth=error_line_width,
markeredgewidth=1
)
else:
ax.plot(time, data_exp, marker=marker_style, linestyle='', color=point_color,
label='Datos experimentales')
else:
ax.text(0.5, 0.5, 'No hay datos experimentales para mostrar.',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10, color='gray')
if y_pred_model is not None and len(y_pred_model) > 0 and not np.all(np.isnan(y_pred_model)):
ax.plot(time_to_plot, y_pred_model, linestyle=line_style, color=line_color, label=model_name_legend)
# ... (rest of messages for failed fits)
elif idx == 0 and (y_pred_biomass_fit is None and not can_use_ode): # If biomass fit failed and ODE not possible
ax.text(0.5, 0.6, 'Modelo de biomasa no ajustado.',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10, color='red')
elif (idx == 1 and y_pred_substrate_fit is None and not can_use_ode) or \
(idx == 2 and y_pred_product_fit is None and not can_use_ode) :
if not ('biomass' in self.params and self.params['biomass']): # If biomass params are missing
ax.text(0.5, 0.4, 'Modelo no ajustado (depende de biomasa).',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10, color='orange')
elif y_pred_model is None or np.all(np.isnan(y_pred_model)): # If this specific model (S or P) failed
ax.text(0.5, 0.4, 'Modelo no ajustado.',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10, color='orange')
ax.set_xlabel(axis_labels['x_label'])
ax.set_ylabel(ylabel)
if show_legend:
ax.legend(loc=legend_position)
ax.set_title(f'{ylabel}')
if show_params and params_dict and any(np.isfinite(v) for v in params_dict.values()): # Show if any param is finite
param_text_list = []
for k, v_param in params_dict.items():
param_text_list.append(f"{k} = {v_param:.3g}" if np.isfinite(v_param) else f"{k} = N/A")
param_text = '\n'.join(param_text_list)
r2_display = f"{r2_val:.3f}" if np.isfinite(r2_val) else "N/A"
rmse_display = f"{rmse_val:.3f}" if np.isfinite(rmse_val) else "N/A"
text = f"{param_text}\nR² = {r2_display}\nRMSE = {rmse_display}"
if params_position == 'outside right':
bbox_props = dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.5)
fig.subplots_adjust(right=0.75)
ax.annotate(text, xy=(1.05, 0.5), xycoords='axes fraction',
xytext=(10,0), textcoords='offset points',
verticalalignment='center', horizontalalignment='left',
bbox=bbox_props)
else:
text_x, ha = (0.95, 'right') if 'right' in params_position else (0.05, 'left')
text_y, va = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom')
ax.text(text_x, text_y, text, transform=ax.transAxes,
verticalalignment=va, horizontalalignment=ha,
bbox={'boxstyle': 'round,pad=0.3', 'facecolor':'wheat', 'alpha':0.5})
elif show_params : # No params or all NaN
ax.text(0.5, 0.3, 'Parámetros no disponibles.',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=9, color='grey')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
buf = io.BytesIO()
fig.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
image = Image.open(buf).convert("RGB")
plt.close(fig)
return image
def plot_combined_results(self, time, biomass, substrate, product,
y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, # Renamed
biomass_std=None, substrate_std=None, product_std=None,
experiment_name='', legend_position='best', params_position='upper right',
show_legend=True, show_params=True,
style='whitegrid',
line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o',
use_differential=False, axis_labels=None,
show_error_bars=True, error_cap_size=3, error_line_width=1):
y_pred_biomass, y_pred_substrate, y_pred_product = y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit
if y_pred_biomass is None and not (use_differential and self.biomass_diff is not None):
print(f"No se pudo ajustar biomasa para {experiment_name} con {self.model_type} (combinado). Omitiendo figura.")
return None
can_use_ode = use_differential and self.biomass_diff is not None and 'biomass' in self.params and self.params['biomass']
if use_differential and self.biomass_diff is None:
print(f"Modelo {self.model_type} no soporta EDOs (combinado). Usando ajuste directo.")
if axis_labels is None: axis_labels = {'x_label': 'Tiempo', 'biomass_label': 'Biomasa', 'substrate_label': 'Sustrato', 'product_label': 'Producto'}
sns.set_style(style)
time_to_plot = time
if can_use_ode:
X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product)
if X_ode is not None:
y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode
time_to_plot = time_fine_ode
else:
print(f"Fallo al resolver EDOs para {experiment_name} (combinado), usando resultados de curve_fit.")
time_to_plot = self.generate_fine_time_grid(time)
if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']:
biomass_params_values = list(self.params['biomass'].values())
y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values)
if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']:
substrate_params_values = list(self.params['substrate'].values())
y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values)
if y_pred_product_fit is not None and 'product' in self.params and self.params['product']:
product_params_values = list(self.params['product'].values())
y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values)
else: # Not using ODE or ODE not supported
time_to_plot = self.generate_fine_time_grid(time)
if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']:
biomass_params_values = list(self.params['biomass'].values())
y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values)
if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']:
substrate_params_values = list(self.params['substrate'].values())
y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values)
else: y_pred_substrate = np.full_like(time_to_plot, np.nan)
if y_pred_product_fit is not None and 'product' in self.params and self.params['product']:
product_params_values = list(self.params['product'].values())
y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values)
else: y_pred_product = np.full_like(time_to_plot, np.nan)
else:
y_pred_biomass = np.full_like(time_to_plot, np.nan)
y_pred_substrate = np.full_like(time_to_plot, np.nan)
y_pred_product = np.full_like(time_to_plot, np.nan)
fig, ax1 = plt.subplots(figsize=(12, 7))
fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()})', fontsize=16)
# ... (rest of plot_combined_results is the same, using new y_pred variables and error bar params)
colors = {'Biomasa': 'blue', 'Sustrato': 'green', 'Producto': 'red'}
data_colors = {'Biomasa': 'darkblue', 'Sustrato': 'darkgreen', 'Producto': 'darkred'}
model_colors = {'Biomasa': 'cornflowerblue', 'Sustrato': 'limegreen', 'Producto': 'salmon'}
ax1.set_xlabel(axis_labels['x_label'])
ax1.set_ylabel(axis_labels['biomass_label'], color=colors['Biomasa'])
if biomass is not None and len(biomass) > 0 and not np.all(np.isnan(biomass)):
if show_error_bars and biomass_std is not None and len(biomass_std) == len(biomass) and not np.all(np.isnan(biomass_std)):
ax1.errorbar(
time, biomass, yerr=biomass_std,
fmt=marker_style, color=data_colors['Biomasa'],
label=f'{axis_labels["biomass_label"]} (Datos)',
capsize=error_cap_size, elinewidth=error_line_width, markersize=5
)
else:
ax1.plot(time, biomass, marker=marker_style, linestyle='', color=data_colors['Biomasa'],
label=f'{axis_labels["biomass_label"]} (Datos)', markersize=5)
if y_pred_biomass is not None and len(y_pred_biomass) > 0 and not np.all(np.isnan(y_pred_biomass)):
ax1.plot(time_to_plot, y_pred_biomass, linestyle=line_style, color=model_colors['Biomasa'],
label=f'{axis_labels["biomass_label"]} (Modelo)')
ax1.tick_params(axis='y', labelcolor=colors['Biomasa'])
ax2 = ax1.twinx()
ax2.set_ylabel(axis_labels['substrate_label'], color=colors['Sustrato'])
if substrate is not None and len(substrate) > 0 and not np.all(np.isnan(substrate)):
if show_error_bars and substrate_std is not None and len(substrate_std) == len(substrate) and not np.all(np.isnan(substrate_std)):
ax2.errorbar(
time, substrate, yerr=substrate_std,
fmt=marker_style, color=data_colors['Sustrato'],
label=f'{axis_labels["substrate_label"]} (Datos)',
capsize=error_cap_size, elinewidth=error_line_width, markersize=5
)
else:
ax2.plot(time, substrate, marker=marker_style, linestyle='', color=data_colors['Sustrato'],
label=f'{axis_labels["substrate_label"]} (Datos)', markersize=5)
if y_pred_substrate is not None and len(y_pred_substrate) > 0 and not np.all(np.isnan(y_pred_substrate)):
ax2.plot(time_to_plot, y_pred_substrate, linestyle=line_style, color=model_colors['Sustrato'],
label=f'{axis_labels["substrate_label"]} (Modelo)')
ax2.tick_params(axis='y', labelcolor=colors['Sustrato'])
ax3 = ax1.twinx()
ax3.spines["right"].set_position(("axes", 1.15))
ax3.set_frame_on(True); ax3.patch.set_visible(False)
ax3.set_ylabel(axis_labels['product_label'], color=colors['Producto'])
if product is not None and len(product) > 0 and not np.all(np.isnan(product)):
if show_error_bars and product_std is not None and len(product_std) == len(product) and not np.all(np.isnan(product_std)):
ax3.errorbar(
time, product, yerr=product_std,
fmt=marker_style, color=data_colors['Producto'],
label=f'{axis_labels["product_label"]} (Datos)',
capsize=error_cap_size, elinewidth=error_line_width, markersize=5
)
else:
ax3.plot(time, product, marker=marker_style, linestyle='', color=data_colors['Producto'],
label=f'{axis_labels["product_label"]} (Datos)', markersize=5)
if y_pred_product is not None and len(y_pred_product) > 0 and not np.all(np.isnan(y_pred_product)):
ax3.plot(time_to_plot, y_pred_product, linestyle=line_style, color=model_colors['Producto'],
label=f'{axis_labels["product_label"]} (Modelo)')
ax3.tick_params(axis='y', labelcolor=colors['Producto'])
lines_labels_collect = []
for ax_current in [ax1, ax2, ax3]:
h, l = ax_current.get_legend_handles_labels()
if h: lines_labels_collect.append((h,l))
if lines_labels_collect:
lines, labels = [sum(lol, []) for lol in zip(*[(h,l) for h,l in lines_labels_collect])]
unique_labels_dict = dict(zip(labels, lines))
if show_legend: ax1.legend(unique_labels_dict.values(), unique_labels_dict.keys(), loc=legend_position)
if show_params:
texts_to_display = []
param_categories = [
(axis_labels['biomass_label'], self.params.get('biomass', {}), self.r2.get('biomass', np.nan), self.rmse.get('biomass', np.nan)),
(axis_labels['substrate_label'], self.params.get('substrate', {}), self.r2.get('substrate', np.nan), self.rmse.get('substrate', np.nan)),
(axis_labels['product_label'], self.params.get('product', {}), self.r2.get('product', np.nan), self.rmse.get('product', np.nan))
]
for label, params_dict, r2_val, rmse_val in param_categories:
if params_dict and any(np.isfinite(v) for v in params_dict.values()):
param_text_list = [f" {k} = {v_par:.3g}" if np.isfinite(v_par) else f" {k} = N/A" for k,v_par in params_dict.items()]
param_text = '\n'.join(param_text_list)
r2_display = f"{r2_val:.3f}" if np.isfinite(r2_val) else "N/A"
rmse_display = f"{rmse_val:.3f}" if np.isfinite(rmse_val) else "N/A"
texts_to_display.append(f"{label}:\n{param_text}\n R² = {r2_display}\n RMSE = {rmse_display}")
elif params_dict: texts_to_display.append(f"{label}:\n Parámetros no válidos o N/A")
total_text = "\n\n".join(texts_to_display)
if total_text:
if params_position == 'outside right':
fig.subplots_adjust(right=0.70)
fig.text(0.72, 0.5, total_text, transform=fig.transFigure,
verticalalignment='center', horizontalalignment='left',
bbox=dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.7), fontsize=8)
else:
text_x, ha = (0.95, 'right') if 'right' in params_position else (0.05, 'left')
text_y, va = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom')
ax1.text(text_x, text_y, total_text, transform=ax1.transAxes,
verticalalignment=va, horizontalalignment=ha,
bbox=dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.7), fontsize=8)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
if params_position == 'outside right': fig.subplots_adjust(right=0.70)
buf = io.BytesIO(); fig.savefig(buf, format='png', bbox_inches='tight'); buf.seek(0)
image = Image.open(buf).convert("RGB"); plt.close(fig)
return image
def process_all_data(file, legend_position, params_position, model_types_selected, experiment_names_str,
lower_bounds_str, upper_bounds_str,
mode, style, line_color, point_color, line_style, marker_style,
show_legend, show_params, use_differential, maxfev_val,
axis_labels_dict,
show_error_bars, error_cap_size, error_line_width):
# ... (Excel reading and sheet iteration logic remains the same)
if file is None: return [], pd.DataFrame(), "Por favor, sube un archivo Excel."
try:
xls = pd.ExcelFile(file.name if hasattr(file, 'name') else file)
sheet_names = xls.sheet_names
if not sheet_names: return [], pd.DataFrame(), "El archivo Excel está vacío."
except Exception as e: return [], pd.DataFrame(), f"Error al leer el archivo Excel: {e}"
figures = []
comparison_data = []
experiment_names_list = experiment_names_str.strip().split('\n') if experiment_names_str.strip() else []
all_plot_messages = []
for sheet_name_idx, sheet_name in enumerate(sheet_names):
current_experiment_name_base = (experiment_names_list[sheet_name_idx]
if sheet_name_idx < len(experiment_names_list) and experiment_names_list[sheet_name_idx]
else f"Hoja '{sheet_name}'")
try:
df = pd.read_excel(xls, sheet_name=sheet_name, header=[0, 1])
if df.empty: all_plot_messages.append(f"Hoja '{sheet_name}' vacía."); continue
if not any(col_level2 == 'Tiempo' for _, col_level2 in df.columns):
all_plot_messages.append(f"Hoja '{sheet_name}' sin 'Tiempo'."); continue
except Exception as e:
all_plot_messages.append(f"Error leyendo hoja '{sheet_name}': {e}."); continue
model_dummy_for_sheet = BioprocessModel() # To process sheet data once
try:
model_dummy_for_sheet.process_data(df)
except ValueError as e:
all_plot_messages.append(f"Error procesando datos de '{sheet_name}': {e}."); continue
# Ensure dataxp, datasp, datapp are populated for average/combinado modes
# These should be populated by model_dummy_for_sheet.process_data()
# If they are empty lists, it means no valid data was found for that component.
if mode == 'independent':
# ... (independent mode logic remains largely the same)
# Ensure time_exp, biomass_exp etc. are correctly extracted and validated
grouped_cols = df.columns.get_level_values(0).unique()
for exp_idx, exp_col_name in enumerate(grouped_cols):
current_experiment_name = f"{current_experiment_name_base} - Exp {exp_idx + 1} ({exp_col_name})"
exp_df_slice = df[exp_col_name]
try:
time_exp = exp_df_slice['Tiempo'].dropna().astype(float).values
biomass_exp = exp_df_slice['Biomasa'].dropna().astype(float).values if 'Biomasa' in exp_df_slice else np.array([])
substrate_exp = exp_df_slice['Sustrato'].dropna().astype(float).values if 'Sustrato' in exp_df_slice else np.array([])
product_exp = exp_df_slice['Producto'].dropna().astype(float).values if 'Producto' in exp_df_slice else np.array([])
if len(time_exp) == 0: all_plot_messages.append(f"Sin datos de tiempo para {current_experiment_name}."); continue
if len(biomass_exp) == 0:
all_plot_messages.append(f"Sin datos de biomasa para {current_experiment_name}.")
for mt in model_types_selected: comparison_data.append({'Experimento': current_experiment_name, 'Modelo': mt.capitalize(), 'R² Biomasa': np.nan, 'RMSE Biomasa': np.nan})
continue
# Align data if lengths differ due to NaNs (simple truncation to min length)
min_len = min(len(time_exp), len(biomass_exp) if len(biomass_exp)>0 else len(time_exp),
len(substrate_exp) if len(substrate_exp)>0 else len(time_exp),
len(product_exp) if len(product_exp)>0 else len(time_exp))
time_exp = time_exp[:min_len]
if len(biomass_exp)>0: biomass_exp = biomass_exp[:min_len]
if len(substrate_exp)>0: substrate_exp = substrate_exp[:min_len]
if len(product_exp)>0: product_exp = product_exp[:min_len]
except KeyError as e: all_plot_messages.append(f"Faltan columnas en '{current_experiment_name}': {e}."); continue
except Exception as e_data: all_plot_messages.append(f"Error extrayendo datos para '{current_experiment_name}': {e_data}."); continue
biomass_std_exp, substrate_std_exp, product_std_exp = None, None, None # No std for independent mode here
for model_type_iter in model_types_selected:
model_instance = BioprocessModel(model_type=model_type_iter, maxfev=maxfev_val)
model_instance.fit_model() # Sets self.biomass_model, self.biomass_diff
y_pred_biomass = model_instance.fit_biomass(time_exp, biomass_exp)
y_pred_substrate, y_pred_product = None, None
if y_pred_biomass is not None and model_instance.params.get('biomass'):
if len(substrate_exp) > 0: y_pred_substrate = model_instance.fit_substrate(time_exp, substrate_exp, model_instance.params['biomass'])
if len(product_exp) > 0: y_pred_product = model_instance.fit_product(time_exp, product_exp, model_instance.params['biomass'])
else: all_plot_messages.append(f"Ajuste biomasa falló: {current_experiment_name}, {model_type_iter}.")
comparison_data.append({
'Experimento': current_experiment_name, 'Modelo': model_type_iter.capitalize(),
'R² Biomasa': model_instance.r2.get('biomass', np.nan), 'RMSE Biomasa': model_instance.rmse.get('biomass', np.nan),
'R² Sustrato': model_instance.r2.get('substrate', np.nan), 'RMSE Sustrato': model_instance.rmse.get('substrate', np.nan),
'R² Producto': model_instance.r2.get('product', np.nan), 'RMSE Producto': model_instance.rmse.get('product', np.nan)
})
fig = model_instance.plot_results(
time_exp, biomass_exp, substrate_exp, product_exp,
y_pred_biomass, y_pred_substrate, y_pred_product, # Pass curve_fit results
biomass_std_exp, substrate_std_exp, product_std_exp,
current_experiment_name, legend_position, params_position,
show_legend, show_params, style, line_color, point_color, line_style, marker_style,
use_differential, axis_labels_dict,
show_error_bars, error_cap_size, error_line_width
)
if fig: figures.append(fig)
elif mode in ['average', 'combinado']:
# ... (average/combinado mode logic remains largely the same)
current_experiment_name = f"{current_experiment_name_base} - Promedio"
time_avg = model_dummy_for_sheet.time
# Check if dataxp, datasp, datapp are available from process_data
biomass_avg = model_dummy_for_sheet.dataxp[-1] if model_dummy_for_sheet.dataxp and len(model_dummy_for_sheet.dataxp[-1]) > 0 else np.array([])
substrate_avg = model_dummy_for_sheet.datasp[-1] if model_dummy_for_sheet.datasp and len(model_dummy_for_sheet.datasp[-1]) > 0 else np.array([])
product_avg = model_dummy_for_sheet.datapp[-1] if model_dummy_for_sheet.datapp and len(model_dummy_for_sheet.datapp[-1]) > 0 else np.array([])
biomass_std_avg = model_dummy_for_sheet.datax_std[-1] if model_dummy_for_sheet.datax_std and len(model_dummy_for_sheet.datax_std[-1]) == len(biomass_avg) else None
substrate_std_avg = model_dummy_for_sheet.datas_std[-1] if model_dummy_for_sheet.datas_std and len(model_dummy_for_sheet.datas_std[-1]) == len(substrate_avg) else None
product_std_avg = model_dummy_for_sheet.datap_std[-1] if model_dummy_for_sheet.datap_std and len(model_dummy_for_sheet.datap_std[-1]) == len(product_avg) else None
if time_avg is None or len(time_avg) == 0: all_plot_messages.append(f"Sin datos de tiempo promedio para '{sheet_name}'."); continue
if len(biomass_avg) == 0:
all_plot_messages.append(f"Sin datos de biomasa promedio para '{sheet_name}'.")
for mt in model_types_selected: comparison_data.append({'Experimento': current_experiment_name, 'Modelo': mt.capitalize(), 'R² Biomasa': np.nan, 'RMSE Biomasa': np.nan})
continue
for model_type_iter in model_types_selected:
model_instance = BioprocessModel(model_type=model_type_iter, maxfev=maxfev_val)
model_instance.fit_model()
y_pred_biomass = model_instance.fit_biomass(time_avg, biomass_avg)
y_pred_substrate, y_pred_product = None, None
if y_pred_biomass is not None and model_instance.params.get('biomass'):
if len(substrate_avg) > 0: y_pred_substrate = model_instance.fit_substrate(time_avg, substrate_avg, model_instance.params['biomass'])
if len(product_avg) > 0: y_pred_product = model_instance.fit_product(time_avg, product_avg, model_instance.params['biomass'])
else: all_plot_messages.append(f"Ajuste biomasa promedio falló: {current_experiment_name}, {model_type_iter}.")
comparison_data.append({
'Experimento': current_experiment_name, 'Modelo': model_type_iter.capitalize(),
'R² Biomasa': model_instance.r2.get('biomass', np.nan), 'RMSE Biomasa': model_instance.rmse.get('biomass', np.nan),
'R² Sustrato': model_instance.r2.get('substrate', np.nan), 'RMSE Sustrato': model_instance.rmse.get('substrate', np.nan),
'R² Producto': model_instance.r2.get('product', np.nan), 'RMSE Producto': model_instance.rmse.get('product', np.nan)
})
plot_func = model_instance.plot_combined_results if mode == 'combinado' else model_instance.plot_results
fig = plot_func(
time_avg, biomass_avg, substrate_avg, product_avg,
y_pred_biomass, y_pred_substrate, y_pred_product, # Pass curve_fit results
biomass_std_avg, substrate_std_avg, product_std_avg,
current_experiment_name, legend_position, params_position,
show_legend, show_params, style, line_color, point_color, line_style, marker_style,
use_differential, axis_labels_dict,
show_error_bars, error_cap_size, error_line_width
)
if fig: figures.append(fig)
comparison_df = pd.DataFrame(comparison_data)
if not comparison_df.empty:
for col in ['R² Biomasa', 'RMSE Biomasa', 'R² Sustrato', 'RMSE Sustrato', 'R² Producto', 'RMSE Producto']:
if col in comparison_df.columns: comparison_df[col] = pd.to_numeric(comparison_df[col], errors='coerce')
comparison_df_sorted = comparison_df.sort_values(
by=['Experimento', 'Modelo', 'R² Biomasa', 'R² Sustrato', 'R² Producto', 'RMSE Biomasa', 'RMSE Sustrato', 'RMSE Producto'],
ascending=[True, True, False, False, False, True, True, True]
).reset_index(drop=True)
else:
comparison_df_sorted = pd.DataFrame(columns=[
'Experimento', 'Modelo', 'R² Biomasa', 'RMSE Biomasa',
'R² Sustrato', 'RMSE Sustrato', 'R² Producto', 'RMSE Producto'
])
final_message = "Procesamiento completado."
if all_plot_messages: final_message += " Mensajes:\n" + "\n".join(all_plot_messages)
if not figures and not comparison_df_sorted.empty: final_message += "\nNo se generaron gráficos, pero hay datos en la tabla."
elif not figures and comparison_df_sorted.empty: final_message += "\nNo se generaron gráficos ni datos para la tabla."
return figures, comparison_df_sorted, final_message
MODEL_CHOICES = [
("Logistic (3-parám)", "logistic"),
("Gompertz (3-parám)", "gompertz"),
("Moser (3-parám)", "moser"),
("Baranyi (4-parám)", "baranyi")
# Add more models here as ("Display Name (X-param)", "internal_model_name")
]
def create_interface():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Modelos Cinéticos de Bioprocesos")
# ... (Markdown descriptions remain the same)
gr.Markdown(r"""
Análisis y visualización de datos de bioprocesos utilizando modelos cinéticos como Logístico, Gompertz y Moser para el crecimiento de biomasa,
y el modelo de Luedeking-Piret para el consumo de sustrato y la formación de producto.
Nuevos modelos como Baranyi (4 parámetros) han sido añadidos.
**Instrucciones:**
1. Sube un archivo Excel. El archivo debe tener una estructura de MultiIndex en las columnas:
- Nivel 0: Nombre del experimento/tratamiento (ej: "Control", "Tratamiento A")
- Nivel 1: Tipo de dato ("Tiempo", "Biomasa", "Sustrato", "Producto")
- Si hay réplicas, deben estar como columnas separadas bajo el mismo nombre de experimento (Nivel 0) y tipo de dato (Nivel 1).
Ejemplo: (Control, Biomasa, Rep1), (Control, Biomasa, Rep2). El código promediará estas réplicas para los modos "average" y "combinado".
Para el modo "independent", se asume una sola serie de datos por (Experimento, TipoDato).
2. Selecciona el/los tipo(s) de modelo(s) de biomasa a ajustar. Los modelos están agrupados por el número de parámetros.
3. Elige el modo de análisis:
- `independent`: Analiza cada experimento (columna de Nivel 0) individualmente.
- `average`: Promedia los datos de todos los experimentos dentro de una hoja y ajusta los modelos a estos promedios. Se grafica en subplots separados.
- `combinado`: Similar a `average`, pero grafica Biomasa, Sustrato y Producto en un solo gráfico con múltiples ejes Y.
4. Configura las opciones de graficación (leyenda, parámetros, estilos, colores, etc.).
5. (Opcional) Personaliza los nombres de los experimentos y los títulos de los ejes.
6. Haz clic en "Simular" para generar los gráficos y la tabla comparativa.
7. Puedes exportar la tabla de resultados a Excel o CSV.
""")
gr.Markdown(r"""
## Ecuaciones Diferenciales Utilizadas (Simplificado)
**Biomasa:**
- Logístico (3p: $X_0, X_m, \mu_m$):
$$ X(t) = \frac{X_0 X_m e^{\mu_m t}}{X_m - X_0 + X_0 e^{\mu_m t}} \quad \text{o} \quad \frac{dX}{dt} = \mu_m X\left(1 - \frac{X}{X_m}\right) $$
- Gompertz (3p: $X_m, \mu_m, \lambda$):
$$ X(t) = X_m \exp\left(-\exp\left(\frac{\mu_m e}{X_m}(\lambda-t)+1\right)\right) \quad \text{o} \quad \frac{dX}{dt} = \mu_m X \ln\left(\frac{X_m}{X}\right) \text{ (forma alternativa)} $$
- Moser (3p: $X_m, \mu_m, K_s$ - forma simplificada):
$$ X(t)=X_m(1-e^{-\mu_m(t-K_s)}) \quad \text{o} \quad \frac{dX}{dt}=\mu_m(X_m - X) $$
- Baranyi (4p: $X_0, X_m, \mu_m, \lambda$):
$$ \ln X(t) = \ln X_0 + \mu_m A(t) - \ln\left(1 + \frac{e^{\mu_m A(t)}-1}{X_m/X_0}\right) $$
$$ A(t) = t + \frac{1}{\mu_m} \ln(e^{-\mu_m t} + e^{-\mu_m \lambda} - e^{-\mu_m(t+\lambda)}) $$
(Ecuación diferencial compleja, no usada para ODE en esta versión)
**Sustrato y Producto (Luedeking-Piret):**
$$ \frac{dS}{dt} = -p \frac{dX}{dt} - q X \quad ; \quad \frac{dP}{dt} = \alpha \frac{dX}{dt} + \beta X $$
Parámetros: $X_m, \mu_m, X_0, \lambda (\text{lag}), K_s, p, q, \alpha, \beta$.
""")
with gr.Row():
file_input = gr.File(label="Subir archivo Excel (.xlsx)", file_types=['.xlsx'])
mode = gr.Radio(["independent", "average", "combinado"], label="Modo de Análisis", value="independent",
info="Independent: cada experimento. Average/Combinado: promedio de la hoja.")
with gr.Accordion("Configuración de Modelos y Simulación", open=True): # Open by default
model_types_selected_ui = gr.CheckboxGroup(
choices=MODEL_CHOICES, # Use the global list of (DisplayName, value)
label="Tipo(s) de Modelo de Biomasa",
value=["logistic"] # Default selected internal value
)
use_differential_ui = gr.Checkbox(label="Usar Ecuaciones Diferenciales para Graficar (experimental)", value=False,
info="Si se marca, las curvas se generan resolviendo las EDOs (si el modelo lo soporta). Si no, por ajuste directo.")
maxfev_input_ui = gr.Number(label="maxfev (Máx. evaluaciones para el ajuste)", value=50000, minimum=1000, step=1000)
experiment_names_str_ui = gr.Textbox(
label="Nombres de los experimentos/hojas (uno por línea, opcional)",
placeholder="Nombre para Hoja 1\nNombre para Hoja 2\n...",
lines=3,
info="Si se deja vacío, se usarán los nombres de las hojas o 'Exp X'."
)
# ... (rest of the UI for graph settings, axis labels, error bars remains the same)
with gr.Accordion("Configuración de Gráficos", open=False):
with gr.Row():
with gr.Column(scale=1):
legend_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "best"], label="Posición de Leyenda", value="best")
show_legend_ui = gr.Checkbox(label="Mostrar Leyenda", value=True)
with gr.Column(scale=1):
params_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "outside right"], label="Posición de Parámetros", value="upper right")
show_params_ui = gr.Checkbox(label="Mostrar Parámetros", value=True)
with gr.Row():
style_dropdown_ui = gr.Dropdown(choices=['white', 'dark', 'whitegrid', 'darkgrid', 'ticks'], label="Estilo de Gráfico (Seaborn)", value='whitegrid')
line_color_picker_ui = gr.ColorPicker(label="Color de Línea (Modelo)", value='#0072B2')
point_color_picker_ui = gr.ColorPicker(label="Color de Puntos (Datos)", value='#D55E00')
with gr.Row():
line_style_dropdown_ui = gr.Dropdown(choices=['-', '--', '-.', ':'], label="Estilo de Línea", value='-')
marker_style_dropdown_ui = gr.Dropdown(choices=['o', 's', '^', 'v', 'D', 'x', '+', '*'], label="Estilo de Marcador (Puntos)", value='o')
with gr.Row():
x_axis_label_input_ui = gr.Textbox(label="Título Eje X", value="Tiempo (h)", placeholder="Tiempo (unidades)")
biomass_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Biomasa)", value="Biomasa (g/L)", placeholder="Biomasa (unidades)")
with gr.Row():
substrate_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Sustrato)", value="Sustrato (g/L)", placeholder="Sustrato (unidades)")
product_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Producto)", value="Producto (g/L)", placeholder="Producto (unidades)")
with gr.Row():
show_error_bars_ui = gr.Checkbox(label="Mostrar barras de error", value=True)
error_cap_size_ui = gr.Slider(label="Tamaño de tapa de barras de error", minimum=1, maximum=10, step=1, value=3)
error_line_width_ui = gr.Slider(label="Grosor de línea de error", minimum=0.5, maximum=5, step=0.5, value=1.0)
with gr.Accordion("Configuración Avanzada de Ajuste (No implementado aún)", open=False):
with gr.Row():
lower_bounds_str_ui = gr.Textbox(label="Lower Bounds (no usado actualmente)", lines=3)
upper_bounds_str_ui = gr.Textbox(label="Upper Bounds (no usado actualmente)", lines=3)
simulate_btn = gr.Button("Simular y Graficar", variant="primary")
status_message_ui = gr.Textbox(label="Estado del Procesamiento", interactive=False)
output_gallery_ui = gr.Gallery(label="Resultados Gráficos", columns=[2,1], height='auto', object_fit="contain")
output_table_ui = gr.Dataframe(
label="Tabla Comparativa de Modelos",
headers=["Experimento", "Modelo", "R² Biomasa", "RMSE Biomasa",
"R² Sustrato", "RMSE Sustrato", "R² Producto", "RMSE Producto"],
interactive=False, wrap=True
)
state_df_ui = gr.State(pd.DataFrame()) # To store the dataframe for export
def run_simulation_interface(file, legend_pos, params_pos, models_sel, analysis_mode, exp_names,
low_bounds, up_bounds, plot_style,
line_col, point_col, line_sty, marker_sty,
show_leg, show_par, use_diff, maxfev,
x_label, biomass_label, substrate_label, product_label,
show_error_bars_arg, error_cap_size_arg, error_line_width_arg):
if file is None: return [], pd.DataFrame(), "Error: Por favor, sube un archivo Excel.", pd.DataFrame()
axis_labels = {
'x_label': x_label if x_label else 'Tiempo',
'biomass_label': biomass_label if biomass_label else 'Biomasa',
'substrate_label': substrate_label if substrate_label else 'Sustrato',
'product_label': product_label if product_label else 'Producto'
}
if not models_sel: return [], pd.DataFrame(), "Error: Por favor, selecciona al menos un modelo.", pd.DataFrame()
figures, comparison_df, message = process_all_data(
file, legend_pos, params_pos, models_sel, exp_names,
low_bounds, up_bounds, analysis_mode, plot_style,
line_col, point_col, line_sty, marker_sty,
show_leg, show_par, use_diff, int(maxfev),
axis_labels, show_error_bars_arg, error_cap_size_arg, error_line_width_arg
)
return figures, comparison_df, message, comparison_df
simulate_btn.click(
fn=run_simulation_interface,
inputs=[
file_input, legend_position_ui, params_position_ui, model_types_selected_ui, mode, experiment_names_str_ui,
lower_bounds_str_ui, upper_bounds_str_ui, style_dropdown_ui,
line_color_picker_ui, point_color_picker_ui, line_style_dropdown_ui, marker_style_dropdown_ui,
show_legend_ui, show_params_ui, use_differential_ui, maxfev_input_ui,
x_axis_label_input_ui, biomass_axis_label_input_ui, substrate_axis_label_input_ui, product_axis_label_input_ui,
show_error_bars_ui, error_cap_size_ui, error_line_width_ui
],
outputs=[output_gallery_ui, output_table_ui, status_message_ui, state_df_ui]
)
with gr.Row():
export_excel_btn = gr.Button("Exportar Tabla a Excel")
export_csv_btn = gr.Button("Exportar Tabla a CSV")
download_file_output_ui = gr.File(label="Descargar archivo", interactive=False)
def export_excel_interface(df_to_export):
if df_to_export is None or df_to_export.empty:
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(b"No hay datos para exportar."); return tmp.name
try:
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False, mode='w+b') as tmp:
df_to_export.to_excel(tmp.name, index=False); return tmp.name
except Exception as e:
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(f"Error al exportar a Excel: {e}".encode()); return tmp.name
export_excel_btn.click(fn=export_excel_interface, inputs=state_df_ui, outputs=download_file_output_ui)
def export_csv_interface(df_to_export):
if df_to_export is None or df_to_export.empty:
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(b"No hay datos para exportar."); return tmp.name
try:
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', encoding='utf-8') as tmp: # CSV is text
df_to_export.to_csv(tmp.name, index=False); return tmp.name
except Exception as e:
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(f"Error al exportar a CSV: {e}".encode()); return tmp.name
export_csv_btn.click(fn=export_csv_interface, inputs=state_df_ui, outputs=download_file_output_ui)
gr.Examples(
examples=[
[None, "best", "upper right", ["logistic", "baranyi"], "independent", "Exp A\nExp B", "", "", "whitegrid", "#0072B2", "#D55E00", "-", "o", True, True, False, 50000, "Tiempo (días)", "Células (millones/mL)", "Glucosa (mM)", "Anticuerpo (mg/L)", True, 3, 1.0]
],
inputs=[
file_input, legend_position_ui, params_position_ui, model_types_selected_ui, mode, experiment_names_str_ui,
lower_bounds_str_ui, upper_bounds_str_ui, style_dropdown_ui,
line_color_picker_ui, point_color_picker_ui, line_style_dropdown_ui, marker_style_dropdown_ui,
show_legend_ui, show_params_ui, use_differential_ui, maxfev_input_ui,
x_axis_label_input_ui, biomass_axis_label_input_ui, substrate_axis_label_input_ui, product_axis_label_input_ui,
show_error_bars_ui, error_cap_size_ui, error_line_width_ui
],
label="Ejemplo de Configuración (subir archivo manualmente)"
)
return demo
if __name__ == '__main__':
demo_instance = create_interface()
demo_instance.launch(share=True)