BiotechU / app.py
C2MV's picture
Update app.py
4d0ec3f verified
raw
history blame
51.3 kB
# import os # No parece usarse directamente, se puede quitar si no hay un uso oculto
# !pip install gradio seaborn scipy scikit-learn openpyxl pydantic==1.10.0 -q # Ejecutar en el entorno
from pydantic import BaseModel # ConfigDict ya no es necesario en Pydantic V2 si solo usas arbitrary_types_allowed
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.integrate import odeint
from scipy.optimize import curve_fit
from sklearn.metrics import mean_squared_error
import gradio as gr
import io
from PIL import Image
import tempfile
# --- Constantes para nombres de columnas y etiquetas ---
COL_TIME = 'Tiempo'
COL_BIOMASS = 'Biomasa'
COL_SUBSTRATE = 'Sustrato'
COL_PRODUCT = 'Producto'
LABEL_TIME = 'Tiempo'
LABEL_BIOMASS = 'Biomasa'
LABEL_SUBSTRATE = 'Sustrato'
LABEL_PRODUCT = 'Producto'
# --- Fin Constantes ---
class YourModel(BaseModel): # Esto parece ser un vestigio, no se usa. Se puede quitar si es así.
class Config:
arbitrary_types_allowed = True
class BioprocessModel:
def __init__(self, model_type='logistic', maxfev=50000):
self.params = {}
self.r2 = {}
self.rmse = {}
self.datax = []
self.datas = []
self.datap = []
self.dataxp = [] # Promedios
self.datasp = []
self.datapp = []
self.datax_std = [] # Desviaciones estándar
self.datas_std = []
self.datap_std = []
self.datax_sem = [] # Errores estándar de la media
self.datas_sem = []
self.datap_sem = []
self.n_reps_x = [] # Número de réplicas para biomasa
self.n_reps_s = [] # Número de réplicas para sustrato
self.n_reps_p = [] # Número de réplicas para producto
self.biomass_model = None
self.biomass_diff = None
self.model_type = model_type
self.maxfev = maxfev
self.time = np.array([])
@staticmethod
def logistic(time, xo, xm, um):
denominator = (1 - (xo / xm) * (1 - np.exp(um * time)))
denominator = np.where(np.abs(denominator) < 1e-9, np.sign(denominator) * 1e-9 if np.any(denominator) else 1e-9, denominator)
return (xo * np.exp(um * time)) / denominator
@staticmethod
def gompertz(time, xm, um, lag):
return xm * np.exp(-np.exp((um * np.e / xm) * (lag - time) + 1))
@staticmethod
def moser(time, Xm, um, Ks):
return Xm * (1 - np.exp(-um * (time - Ks)))
@staticmethod
def logistic_diff(X, t, params):
xo, xm, um = params
return um * X * (1 - X / xm)
@staticmethod
def gompertz_diff(X, t, params):
xm, um, lag = params
return X * (um * np.e / xm) * np.exp((um * np.e / xm) * (lag - t) + 1)
@staticmethod
def moser_diff(X, t, params):
Xm, um, Ks = params
return um * (Xm - X)
def _get_biomass_model_params_as_list(self):
if 'biomass' not in self.params or not self.params['biomass']:
return None
if self.model_type == 'logistic':
return [self.params['biomass']['xo'], self.params['biomass']['xm'], self.params['biomass']['um']]
elif self.model_type == 'gompertz':
return [self.params['biomass']['xm'], self.params['biomass']['um'], self.params['biomass']['lag']]
elif self.model_type == 'moser':
return [self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['Ks']]
return None
def substrate(self, time, so, p, q, biomass_params_list):
if biomass_params_list is None: return np.full_like(time, so)
X_t = self.biomass_model(time, *biomass_params_list)
integral_X = np.cumsum(X_t) * np.gradient(time)
return so - p * (X_t - biomass_params_list[0]) - q * integral_X
def product(self, time, po, alpha, beta, biomass_params_list):
if biomass_params_list is None: return np.full_like(time, po)
X_t = self.biomass_model(time, *biomass_params_list)
integral_X = np.cumsum(X_t) * np.gradient(time)
return po + alpha * (X_t - biomass_params_list[0]) + beta * integral_X
def process_data(self, df):
biomass_cols = [col for col in df.columns if col[1] == COL_BIOMASS]
substrate_cols = [col for col in df.columns if col[1] == COL_SUBSTRATE]
product_cols = [col for col in df.columns if col[1] == COL_PRODUCT]
time_col_tuple = [col for col in df.columns if col[1] == COL_TIME]
if not time_col_tuple:
raise ValueError(f"No se encontró la columna de '{COL_TIME}' en los datos.")
time_col = time_col_tuple[0]
time = df[time_col].values
self.time = time
def _process_type(data_cols, avg_list, std_list, sem_list, n_reps_list, raw_data_list):
if data_cols:
valid_data_arrays = []
for col in data_cols:
try:
numeric_col = pd.to_numeric(df[col], errors='coerce').values
valid_data_arrays.append(numeric_col)
except KeyError:
print(f"Advertencia: Columna {col} no encontrada, se omitirá para el promedio/std/sem.")
continue
if not valid_data_arrays:
avg_list.append(np.full_like(time, np.nan))
std_list.append(np.full_like(time, np.nan))
sem_list.append(np.full_like(time, np.nan))
n_reps_list.append(np.zeros_like(time, dtype=int)) # Modificado para que sea un array de ceros
raw_data_list.append(np.array([]))
return
data_reps = np.array(valid_data_arrays)
raw_data_list.append(data_reps)
avg_list.append(np.nanmean(data_reps, axis=0))
current_std = np.nanstd(data_reps, axis=0, ddof=1)
std_list.append(current_std)
n_valid_reps_per_timepoint = np.sum(~np.isnan(data_reps), axis=0)
n_reps_list.append(n_valid_reps_per_timepoint)
current_sem = np.zeros_like(current_std) * np.nan
valid_indices_for_sem = (n_valid_reps_per_timepoint > 1)
current_sem[valid_indices_for_sem] = current_std[valid_indices_for_sem] / np.sqrt(n_valid_reps_per_timepoint[valid_indices_for_sem])
sem_list.append(current_sem)
else:
avg_list.append(np.full_like(time, np.nan))
std_list.append(np.full_like(time, np.nan))
sem_list.append(np.full_like(time, np.nan))
n_reps_list.append(np.zeros_like(time, dtype=int)) # Modificado para que sea un array de ceros
raw_data_list.append(np.array([]))
_process_type(biomass_cols, self.dataxp, self.datax_std, self.datax_sem, self.n_reps_x, self.datax)
_process_type(substrate_cols, self.datasp, self.datas_std, self.datas_sem, self.n_reps_s, self.datas)
_process_type(product_cols, self.datapp, self.datap_std, self.datap_sem, self.n_reps_p, self.datap)
def fit_model(self):
if self.model_type == 'logistic':
self.biomass_model = self.logistic
self.biomass_diff = self.logistic_diff
elif self.model_type == 'gompertz':
self.biomass_model = self.gompertz
self.biomass_diff = self.gompertz_diff
elif self.model_type == 'moser':
self.biomass_model = self.moser
self.biomass_diff = self.moser_diff
else:
raise ValueError(f"Tipo de modelo desconocido: {self.model_type}")
def fit_biomass(self, time, biomass, bounds=None):
p0 = None
fit_func = None
param_names = []
if not np.any(np.isfinite(biomass)): # Si toda la biomasa es NaN o inf
print(f"Error en fit_biomass_{self.model_type}: Todos los datos de biomasa son no finitos.")
self.params['biomass'] = {}
return None
# Filtrar NaNs de biomasa y tiempo correspondiente para el ajuste
finite_mask = np.isfinite(biomass)
time_fit = time[finite_mask]
biomass_fit = biomass[finite_mask]
if len(time_fit) < 2 : # No suficientes puntos para ajustar
print(f"Error en fit_biomass_{self.model_type}: No hay suficientes puntos de datos finitos para el ajuste ({len(time_fit)}).")
self.params['biomass'] = {}
return None
if self.model_type == 'logistic':
p0 = [max(1e-6,min(biomass_fit)), max(biomass_fit)*1.5 if max(biomass_fit)>0 else 1.0, 0.1]
fit_func = self.logistic
param_names = ['xo', 'xm', 'um']
elif self.model_type == 'gompertz':
grad_b = np.gradient(biomass_fit)
lag_guess = time_fit[np.argmax(grad_b)] if len(time_fit) > 1 and np.any(grad_b > 1e-3) else time_fit[0]
p0 = [max(biomass_fit) if max(biomass_fit)>0 else 1.0, 0.1, lag_guess]
fit_func = self.gompertz
param_names = ['xm', 'um', 'lag']
elif self.model_type == 'moser':
p0 = [max(biomass_fit) if max(biomass_fit)>0 else 1.0, 0.1, time_fit[0]]
fit_func = self.moser
param_names = ['Xm', 'um', 'Ks']
if fit_func is None:
print(f"Modelo de biomasa no configurado para {self.model_type}")
return None
try:
if bounds:
p0_bounded = []
for i, val in enumerate(p0):
low = bounds[0][i] if bounds[0] and i < len(bounds[0]) else -np.inf
high = bounds[1][i] if bounds[1] and i < len(bounds[1]) else np.inf
p0_bounded.append(np.clip(val, low, high))
p0 = p0_bounded
popt, _ = curve_fit(fit_func, time_fit, biomass_fit, p0=p0, maxfev=self.maxfev, bounds=bounds or (-np.inf, np.inf))
self.params['biomass'] = dict(zip(param_names, popt))
y_pred_fit = fit_func(time_fit, *popt) # Predicción solo para datos ajustados
# Para R2 y RMSE, usar solo los datos que se usaron para el ajuste
if np.sum((biomass_fit - np.mean(biomass_fit)) ** 2) < 1e-9:
self.r2['biomass'] = 1.0 if np.sum((biomass_fit - y_pred_fit) ** 2) < 1e-9 else 0.0
else:
self.r2['biomass'] = 1 - (np.sum((biomass_fit - y_pred_fit) ** 2) / np.sum((biomass_fit - np.mean(biomass_fit)) ** 2))
self.rmse['biomass'] = np.sqrt(mean_squared_error(biomass_fit, y_pred_fit))
# Devolver predicción para el 'time' original, no solo time_fit
y_pred_full = fit_func(time, *popt)
return y_pred_full
except Exception as e:
print(f"Error en fit_biomass_{self.model_type}: {e}")
self.params['biomass'] = {}
return None
def _fit_consumption_production(self, time, data, fit_type, p0_values, param_names):
biomass_params_list = self._get_biomass_model_params_as_list()
if biomass_params_list is None:
print(f"Parámetros de biomasa no disponibles para ajustar {fit_type}.")
return None
if not np.any(np.isfinite(data)):
print(f"Error en fit_{fit_type}_{self.model_type}: Todos los datos de {fit_type} son no finitos.")
self.params[fit_type] = {}
return None
finite_mask = np.isfinite(data)
time_fit = time[finite_mask]
data_fit = data[finite_mask]
if len(time_fit) < 2:
print(f"Error en fit_{fit_type}_{self.model_type}: No hay suficientes puntos de datos finitos para el ajuste ({len(time_fit)}).")
self.params[fit_type] = {}
return None
model_func = self.substrate if fit_type == 'substrate' else self.product
try:
popt, _ = curve_fit(
lambda t, *params_fit: model_func(t, *params_fit, biomass_params_list),
time_fit, data_fit, p0=p0_values, maxfev=self.maxfev
)
self.params[fit_type] = dict(zip(param_names, popt))
y_pred_fit = model_func(time_fit, *popt, biomass_params_list)
if np.sum((data_fit - np.mean(data_fit)) ** 2) < 1e-9:
self.r2[fit_type] = 1.0 if np.sum((data_fit - y_pred_fit) ** 2) < 1e-9 else 0.0
else:
self.r2[fit_type] = 1 - (np.sum((data_fit - y_pred_fit) ** 2) / np.sum((data_fit - np.mean(data_fit)) ** 2))
self.rmse[fit_type] = np.sqrt(mean_squared_error(data_fit, y_pred_fit))
y_pred_full = model_func(time, *popt, biomass_params_list)
return y_pred_full
except Exception as e:
print(f"Error en fit_{fit_type}_{self.model_type}: {e}")
self.params[fit_type] = {}
return None
def fit_substrate(self, time, substrate):
p0_s = [max(1e-6, np.nanmin(substrate)) if np.any(np.isfinite(substrate)) else 1.0, 0.01, 0.01]
param_names_s = ['so', 'p', 'q']
return self._fit_consumption_production(time, substrate, 'substrate', p0_s, param_names_s)
def fit_product(self, time, product):
p0_p = [max(1e-6, np.nanmin(product)) if np.any(np.isfinite(product)) else 0.0, 0.01, 0.01]
param_names_p = ['po', 'alpha', 'beta']
return self._fit_consumption_production(time, product, 'product', p0_p, param_names_p)
def generate_fine_time_grid(self, time):
if len(time) < 2: return time
time_min, time_max = np.nanmin(time), np.nanmax(time)
if np.isnan(time_min) or np.isnan(time_max) or time_min == time_max : return time
return np.linspace(time_min, time_max, 500)
def system(self, y, t, biomass_params_list, substrate_params_dict, product_params_dict):
X, S, P = y
dXdt = 0.0
if self.model_type == 'logistic':
dXdt = self.logistic_diff(X, t, biomass_params_list)
elif self.model_type == 'gompertz':
dXdt = self.gompertz_diff(X, t, biomass_params_list)
elif self.model_type == 'moser':
dXdt = self.moser_diff(X, t, biomass_params_list)
p = substrate_params_dict.get('p', 0)
q = substrate_params_dict.get('q', 0)
alpha = product_params_dict.get('alpha', 0)
beta = product_params_dict.get('beta', 0)
dSdt = -p * dXdt - q * X
dPdt = alpha * dXdt + beta * X
return [dXdt, dSdt, dPdt]
def get_initial_conditions(self, time, biomass, substrate, product):
# Default a los primeros datos finitos
def get_first_finite(arr, default_val=0.0):
finite_arr = arr[np.isfinite(arr)]
return finite_arr[0] if len(finite_arr) > 0 else default_val
X0 = get_first_finite(biomass, 0.1) # Default a 0.1 si no hay datos finitos
S0 = get_first_finite(substrate, 0.0)
P0 = get_first_finite(product, 0.0)
time_min_val = np.nanmin(time) if len(time)>0 and np.any(np.isfinite(time)) else 0
if 'biomass' in self.params and self.params['biomass']:
if self.model_type == 'logistic':
X0 = self.params['biomass']['xo']
elif self.model_type == 'gompertz':
xm, um, lag = self.params['biomass']['xm'], self.params['biomass']['um'], self.params['biomass']['lag']
X0 = xm * np.exp(-np.exp((um * np.e / xm)*(lag - time_min_val)+1))
elif self.model_type == 'moser':
Xm, um, Ks = self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['Ks']
X0 = Xm*(1 - np.exp(-um*(time_min_val - Ks)))
if 'substrate' in self.params and self.params['substrate']:
S0 = self.params['substrate']['so']
if 'product' in self.params and self.params['product']:
P0 = self.params['product']['po']
return [X0, S0, P0]
def solve_differential_equations(self, time, biomass, substrate, product):
biomass_params_list = self._get_biomass_model_params_as_list()
if biomass_params_list is None:
print("No hay parámetros de biomasa, no se pueden resolver las EDO.")
return None, None, None, time
substrate_params_dict = self.params.get('substrate', {})
product_params_dict = self.params.get('product', {})
initial_conditions = self.get_initial_conditions(time, biomass, substrate, product)
time_fine = self.generate_fine_time_grid(time)
if len(time_fine) < 2 : # Si generate_fine_time_grid devolvió el time original y era muy corto
print("No hay suficiente rango de tiempo para resolver EDOs.")
return None, None, None, time
try:
sol = odeint(self.system, initial_conditions, time_fine,
args=(biomass_params_list, substrate_params_dict, product_params_dict))
X, S, P = sol[:, 0], sol[:, 1], sol[:, 2]
return X, S, P, time_fine
except Exception as e:
print(f"Error al resolver EDOs: {e}")
return None, None, None, time_fine
def plot_results(self, time, biomass, substrate, product,
y_pred_biomass, y_pred_substrate, y_pred_product,
biomass_error_values, substrate_error_values, product_error_values,
experiment_name='', legend_position='best', params_position='upper right',
show_legend=True, show_params=True, style='whitegrid',
line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o',
use_differential=False,
time_unit='', biomass_unit='', substrate_unit='', product_unit='',
error_bar_capsize=5):
sns.set_style(style)
time_to_plot = time
if use_differential:
X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product)
if X_ode is not None:
y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode
time_to_plot = time_fine_ode
else:
print(f"Fallo al resolver EDOs para {experiment_name}, usando ajustes si existen.")
if y_pred_biomass is None and not np.any(np.isfinite(biomass)): return None # No graficar si no hay nada
elif y_pred_biomass is None and not np.any(np.isfinite(biomass)):
print(f"No hay datos de biomasa ni ajuste para {experiment_name}. Omitiendo figura.")
return None
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 15))
fig.suptitle(f'{experiment_name} (Modelo: {self.model_type.capitalize()})', fontsize=16)
xlabel_full = f'{LABEL_TIME} ({time_unit})' if time_unit else LABEL_TIME
ylabel_biomass_full = f'{LABEL_BIOMASS} ({biomass_unit})' if biomass_unit else LABEL_BIOMASS
ylabel_substrate_full = f'{LABEL_SUBSTRATE} ({substrate_unit})' if substrate_unit else LABEL_SUBSTRATE
ylabel_product_full = f'{LABEL_PRODUCT} ({product_unit})' if product_unit else LABEL_PRODUCT
plots_config = [
(ax1, biomass, y_pred_biomass, biomass_error_values, ylabel_biomass_full, 'biomass'),
(ax2, substrate, y_pred_substrate, substrate_error_values, ylabel_substrate_full, 'substrate'),
(ax3, product, y_pred_product, product_error_values, ylabel_product_full, 'product')
]
for ax, data, y_pred, data_error_vals, ylabel, param_key in plots_config:
if data is not None and np.any(np.isfinite(data)):
finite_data_mask = np.isfinite(data)
time_finite_data = time[finite_data_mask]
data_finite_values = data[finite_data_mask]
if data_error_vals is not None and np.any(np.isfinite(data_error_vals)) and len(data_error_vals) == len(time):
plot_error_vals = np.copy(data_error_vals[finite_data_mask])
plot_error_vals[~np.isfinite(plot_error_vals)] = 0
ax.errorbar(time_finite_data, data_finite_values,
yerr=plot_error_vals,
fmt=marker_style, color=point_color,
label='Datos experimentales', capsize=error_bar_capsize,
elinewidth=1, markeredgewidth=1)
else:
ax.plot(time_finite_data, data_finite_values,
marker=marker_style, linestyle='', color=point_color,
label='Datos experimentales')
if y_pred is not None and len(y_pred) == len(time_to_plot) and np.any(np.isfinite(y_pred)):
ax.plot(time_to_plot, y_pred, linestyle=line_style, color=line_color, label='Modelo')
ax.set_xlabel(xlabel_full)
ax.set_ylabel(ylabel)
if show_legend:
ax.legend(loc=legend_position)
ax.set_title(f'{ylabel.split(" (")[0]}')
current_params = self.params.get(param_key, {})
r2 = self.r2.get(param_key, np.nan)
rmse = self.rmse.get(param_key, np.nan)
if show_params and current_params:
valid_params = {k: v for k, v in current_params.items() if np.isfinite(v)}
param_text = '\n'.join([f"{k} = {v:.3g}" for k, v in valid_params.items()])
text = f"{param_text}\nR² = {r2:.3f}\nRMSE = {rmse:.3g}"
text_x, ha = (0.95, 'right') if 'right' in params_position else (0.05, 'left')
text_y, va = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom')
if params_position == 'outside right':
fig.subplots_adjust(right=0.75)
ax.annotate(text, xy=(1.05, 0.5), xycoords='axes fraction',
verticalalignment='center', bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))
else:
ax.text(text_x, text_y, text, transform=ax.transAxes,
verticalalignment=va, horizontalalignment=ha,
bbox={'boxstyle': 'round', 'facecolor':'white', 'alpha':0.7})
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
image = Image.open(buf).convert("RGB")
plt.close(fig)
return image
def plot_combined_results(self, time, biomass, substrate, product,
y_pred_biomass, y_pred_substrate, y_pred_product,
biomass_error_values, substrate_error_values, product_error_values,
experiment_name='', legend_position='best', params_position='upper right',
show_legend=True, show_params=True, style='whitegrid',
line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o',
use_differential=False,
time_unit='', biomass_unit='', substrate_unit='', product_unit='',
error_bar_capsize=5):
sns.set_style(style)
time_to_plot = time
if use_differential:
X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product)
if X_ode is not None:
y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode
time_to_plot = time_fine_ode
else:
print(f"Fallo al resolver EDOs para {experiment_name} (combinado), usando ajustes si existen.")
if y_pred_biomass is None and not np.any(np.isfinite(biomass)): return None
elif y_pred_biomass is None and not np.any(np.isfinite(biomass)):
print(f"No hay datos de biomasa ni ajuste para {experiment_name}. Omitiendo figura combinada.")
return None
fig, ax1 = plt.subplots(figsize=(12, 7))
fig.suptitle(f'{experiment_name} (Modelo: {self.model_type.capitalize()})', fontsize=16)
xlabel_full = f'{LABEL_TIME} ({time_unit})' if time_unit else LABEL_TIME
ylabel_biomass_full = f'{LABEL_BIOMASS} ({biomass_unit})' if biomass_unit else LABEL_BIOMASS
ylabel_substrate_full = f'{LABEL_SUBSTRATE} ({substrate_unit})' if substrate_unit else LABEL_SUBSTRATE
ylabel_product_full = f'{LABEL_PRODUCT} ({product_unit})' if product_unit else LABEL_PRODUCT
colors = {'Biomasa': 'blue', 'Sustrato': 'green', 'Producto': 'red'}
def plot_data_with_errors(ax, t, data, error_vals, color, label_prefix, marker, cap_size):
if data is not None and np.any(np.isfinite(data)):
t_finite = t[np.isfinite(data)]
data_finite = data[np.isfinite(data)]
if error_vals is not None and np.any(np.isfinite(error_vals)) and len(error_vals) == len(t):
error_vals_finite = np.copy(error_vals[np.isfinite(data)])
error_vals_finite[~np.isfinite(error_vals_finite)] = 0
ax.errorbar(t_finite, data_finite, yerr=error_vals_finite, fmt=marker, color=color,
label=f'{label_prefix} (Datos)', capsize=cap_size, elinewidth=1, markeredgewidth=1)
else:
ax.plot(t_finite, data_finite, marker=marker, linestyle='', color=color,
label=f'{label_prefix} (Datos)')
ax1.set_xlabel(xlabel_full)
ax1.set_ylabel(ylabel_biomass_full, color=colors['Biomasa'])
plot_data_with_errors(ax1, time, biomass, biomass_error_values, colors['Biomasa'], LABEL_BIOMASS, marker_style, error_bar_capsize)
if y_pred_biomass is not None and len(y_pred_biomass) == len(time_to_plot) and np.any(np.isfinite(y_pred_biomass)):
ax1.plot(time_to_plot, y_pred_biomass, linestyle=line_style, color=colors['Biomasa'], label=f'{LABEL_BIOMASS} (Modelo)')
ax1.tick_params(axis='y', labelcolor=colors['Biomasa'])
ax2 = ax1.twinx()
ax2.set_ylabel(ylabel_substrate_full, color=colors['Sustrato'])
plot_data_with_errors(ax2, time, substrate, substrate_error_values, colors['Sustrato'], LABEL_SUBSTRATE, marker_style, error_bar_capsize)
if y_pred_substrate is not None and len(y_pred_substrate) == len(time_to_plot) and np.any(np.isfinite(y_pred_substrate)):
ax2.plot(time_to_plot, y_pred_substrate, linestyle=line_style, color=colors['Sustrato'], label=f'{LABEL_SUBSTRATE} (Modelo)')
ax2.tick_params(axis='y', labelcolor=colors['Sustrato'])
ax3 = ax1.twinx()
ax3.spines["right"].set_position(("axes", 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
ax3.set_ylabel(ylabel_product_full, color=colors['Producto'])
plot_data_with_errors(ax3, time, product, product_error_values, colors['Producto'], LABEL_PRODUCT, marker_style, error_bar_capsize)
if y_pred_product is not None and len(y_pred_product) == len(time_to_plot) and np.any(np.isfinite(y_pred_product)):
ax3.plot(time_to_plot, y_pred_product, linestyle=line_style, color=colors['Producto'], label=f'{LABEL_PRODUCT} (Modelo)')
ax3.tick_params(axis='y', labelcolor=colors['Producto'])
if show_legend:
handles, labels = [], []
for ax_leg in [ax1, ax2, ax3]:
h, l = ax_leg.get_legend_handles_labels()
handles.extend(h); labels.extend(l)
unique_labels_dict = {}
for h, l in zip(handles, labels):
if l not in unique_labels_dict: unique_labels_dict[l] = h
if unique_labels_dict:
ax1.legend(unique_labels_dict.values(), unique_labels_dict.keys(), loc=legend_position)
if show_params:
texts = []
for param_key, param_label_text in [('biomass', LABEL_BIOMASS), ('substrate', LABEL_SUBSTRATE), ('product', LABEL_PRODUCT)]:
current_params_dict = self.params.get(param_key, {})
r2_val = self.r2.get(param_key, np.nan)
rmse_val = self.rmse.get(param_key, np.nan)
if current_params_dict:
valid_params_dict = {k: v for k, v in current_params_dict.items() if np.isfinite(v)}
param_text_ind = '\n'.join([f"{k} = {v:.3g}" for k, v in valid_params_dict.items()])
texts.append(f"{param_label_text}:\n{param_text_ind}\nR² = {r2_val:.3f}\nRMSE = {rmse_val:.3g}")
total_text = "\n\n".join(texts)
if total_text:
text_x, ha_align = (0.95, 'right') if 'right' in params_position else (0.05, 'left')
text_y, va_align = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom')
if params_position == 'outside right':
fig.subplots_adjust(right=0.70)
ax3.annotate(total_text, xy=(1.25, 0.5), xycoords='axes fraction',
fontsize=8, verticalalignment='center', bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))
else:
ax1.text(text_x, text_y, total_text, transform=ax1.transAxes,
fontsize=8, verticalalignment=va_align, horizontalalignment=ha_align,
bbox={'boxstyle':'round', 'facecolor':'white', 'alpha':0.7})
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
image = Image.open(buf).convert("RGB")
plt.close(fig)
return image
def _process_and_plot_single_experiment(
time_exp, biomass, substrate, product,
biomass_sd, substrate_sd, product_sd,
biomass_sem, substrate_sem, product_sem,
experiment_name, model_type_str, maxfev_val,
legend_position, params_position, show_legend, show_params,
style, line_color, point_color, line_style, marker_style,
use_differential, plot_mode, bounds_biomass,
time_unit, biomass_unit, substrate_unit, product_unit,
error_bar_type, error_bar_capsize):
model = BioprocessModel(model_type=model_type_str, maxfev=maxfev_val)
model.fit_model()
y_pred_biomass = model.fit_biomass(time_exp, biomass, bounds=bounds_biomass)
current_comparison_data = {
'Experimento': experiment_name,
'Modelo': model_type_str.capitalize(),
'R² Biomasa': np.nan, 'RMSE Biomasa': np.nan,
'R² Sustrato': np.nan, 'RMSE Sustrato': np.nan,
'R² Producto': np.nan, 'RMSE Producto': np.nan
}
y_pred_substrate, y_pred_product = None, None
if y_pred_biomass is not None and 'biomass' in model.params and model.params['biomass']:
current_comparison_data.update({
'R² Biomasa': model.r2.get('biomass', np.nan),
'RMSE Biomasa': model.rmse.get('biomass', np.nan)
})
if substrate is not None and len(substrate) > 0 and np.any(np.isfinite(substrate)): # Check for finite values
y_pred_substrate = model.fit_substrate(time_exp, substrate)
if y_pred_substrate is not None:
current_comparison_data.update({
'R² Sustrato': model.r2.get('substrate', np.nan),
'RMSE Sustrato': model.rmse.get('substrate', np.nan)
})
if product is not None and len(product) > 0 and np.any(np.isfinite(product)): # Check for finite values
y_pred_product = model.fit_product(time_exp, product)
if y_pred_product is not None:
current_comparison_data.update({
'R² Producto': model.r2.get('product', np.nan),
'RMSE Producto': model.rmse.get('product', np.nan)
})
else:
print(f"No se pudo ajustar biomasa para {experiment_name} con {model_type_str}.")
error_values_to_plot = {
'biomass': biomass_sd if error_bar_type == 'sd' else biomass_sem,
'substrate': substrate_sd if error_bar_type == 'sd' else substrate_sem,
'product': product_sd if error_bar_type == 'sd' else product_sem,
}
if plot_mode == 'independent': # No error bars from replicates in independent mode
error_values_to_plot = {'biomass': None, 'substrate': None, 'product': None}
fig = None
plot_args_tuple = (time_exp, biomass, substrate, product,
y_pred_biomass, y_pred_substrate, y_pred_product,
error_values_to_plot['biomass'], error_values_to_plot['substrate'], error_values_to_plot['product'],
experiment_name, legend_position, params_position,
show_legend, show_params, style,
line_color, point_color, line_style, marker_style,
use_differential,
time_unit, biomass_unit, substrate_unit, product_unit,
error_bar_capsize)
if plot_mode == 'combinado':
fig = model.plot_combined_results(*plot_args_tuple)
else:
fig = model.plot_results(*plot_args_tuple)
return fig, current_comparison_data
def process_all_data(file, legend_position, params_position, model_types_selected, analysis_mode, experiment_names,
lower_bounds_biomass_str, upper_bounds_biomass_str,
style_plot, line_color_plot, point_color_plot, line_style_plot, marker_style_plot,
show_legend_plot, show_params_plot, use_differential_eqs, maxfev_val,
time_unit_str, biomass_unit_str, substrate_unit_str, product_unit_str,
error_bar_type_selected, error_bar_capsize_selected):
if file is None:
return [], pd.DataFrame(), "Por favor, sube un archivo Excel."
try:
xls = pd.ExcelFile(file.name)
except Exception as e:
return [], pd.DataFrame(), f"Error al leer el archivo Excel: {e}"
sheet_names = xls.sheet_names
figures_list = []
comparison_data_list = []
experiment_counter = 0
parsed_bounds_biomass = ([-np.inf]*3, [np.inf]*3)
try:
if lower_bounds_biomass_str.strip():
lb = [float(x.strip()) for x in lower_bounds_biomass_str.split(',')]
if len(lb) == 3 : parsed_bounds_biomass = (lb, parsed_bounds_biomass[1])
if upper_bounds_biomass_str.strip():
ub = [float(x.strip()) for x in upper_bounds_biomass_str.split(',')]
if len(ub) == 3 : parsed_bounds_biomass = (parsed_bounds_biomass[0], ub)
except ValueError:
print("Advertencia: Bounds para biomasa no son válidos.")
for sheet_name in sheet_names:
try:
df = pd.read_excel(xls, sheet_name=sheet_name, header=[0, 1])
for col_level0 in df.columns.levels[0]: # Asegurar que sean numéricas
for col_level1 in [COL_TIME, COL_BIOMASS, COL_SUBSTRATE, COL_PRODUCT]:
if (col_level0, col_level1) in df.columns:
df[(col_level0, col_level1)] = pd.to_numeric(df[(col_level0, col_level1)], errors='coerce')
# Eliminar filas que son completamente NaN en Tiempo y Biomasa (principales)
df = df.dropna(how='all', subset=[(c[0], c[1]) for c in df.columns if c[1] in [COL_TIME, COL_BIOMASS]])
except Exception as e:
print(f"Error al leer la hoja '{sheet_name}': {e}")
continue
if analysis_mode == 'independent':
unique_experiments_in_sheet = df.columns.levels[0]
for exp_col_name in unique_experiments_in_sheet:
try:
time_exp = df[(exp_col_name, COL_TIME)].dropna().values
if len(time_exp) == 0: continue # No hay datos de tiempo
biomass_exp = df[(exp_col_name, COL_BIOMASS)].values if (exp_col_name, COL_BIOMASS) in df else np.full(len(time_exp), np.nan)
substrate_exp = df[(exp_col_name, COL_SUBSTRATE)].values if (exp_col_name, COL_SUBSTRATE) in df else np.full(len(time_exp), np.nan)
product_exp = df[(exp_col_name, COL_PRODUCT)].values if (exp_col_name, COL_PRODUCT) in df else np.full(len(time_exp), np.nan)
def _align_data(data_array, target_len):
if len(data_array) == target_len: return data_array
if len(data_array) > target_len: return data_array[:target_len]
return np.pad(data_array, (0, target_len - len(data_array)), 'constant', constant_values=np.nan)
biomass_exp = _align_data(biomass_exp, len(time_exp))
substrate_exp = _align_data(substrate_exp, len(time_exp))
product_exp = _align_data(product_exp, len(time_exp))
current_exp_name_label = (experiment_names[experiment_counter] if experiment_counter < len(experiment_names)
else f"{sheet_name} - {exp_col_name}")
for model_t in model_types_selected:
fig, comp_data = _process_and_plot_single_experiment(
time_exp, biomass_exp, substrate_exp, product_exp,
None, None, None, # SDs
None, None, None, # SEMs
current_exp_name_label, model_t, int(maxfev_val),
legend_position, params_position, show_legend_plot, show_params_plot,
style_plot, line_color_plot, point_color_plot, line_style_plot, marker_style_plot,
use_differential_eqs, analysis_mode, parsed_bounds_biomass,
time_unit_str, biomass_unit_str, substrate_unit_str, product_unit_str,
error_bar_type_selected, int(error_bar_capsize_selected)
)
if fig: figures_list.append(fig)
comparison_data_list.append(comp_data)
experiment_counter += 1
except KeyError as e:
print(f"Advertencia: Falta columna {e} para '{exp_col_name}' en '{sheet_name}'.")
except Exception as e_exp:
print(f"Error procesando '{exp_col_name}' en '{sheet_name}': {e_exp}")
elif analysis_mode in ['average', 'combinado']:
model_data_loader = BioprocessModel()
try:
model_data_loader.process_data(df)
except ValueError as ve:
print(f"Error en hoja '{sheet_name}': {ve}. Saltando.")
continue
if len(model_data_loader.time) == 0:
print(f"No hay datos de tiempo válidos en '{sheet_name}'. Saltando.")
continue
time_avg = model_data_loader.time
biomass_avg = model_data_loader.dataxp[-1] if model_data_loader.dataxp else np.array([])
substrate_avg = model_data_loader.datasp[-1] if model_data_loader.datasp else np.array([])
product_avg = model_data_loader.datapp[-1] if model_data_loader.datapp else np.array([])
biomass_std_avg = model_data_loader.datax_std[-1] if model_data_loader.datax_std and len(model_data_loader.datax_std[-1]) == len(time_avg) else None
substrate_std_avg = model_data_loader.datas_std[-1] if model_data_loader.datas_std and len(model_data_loader.datas_std[-1]) == len(time_avg) else None
product_std_avg = model_data_loader.datap_std[-1] if model_data_loader.datap_std and len(model_data_loader.datap_std[-1]) == len(time_avg) else None
biomass_sem_avg = model_data_loader.datax_sem[-1] if model_data_loader.datax_sem and len(model_data_loader.datax_sem[-1]) == len(time_avg) else None
substrate_sem_avg = model_data_loader.datas_sem[-1] if model_data_loader.datas_sem and len(model_data_loader.datas_sem[-1]) == len(time_avg) else None
product_sem_avg = model_data_loader.datap_sem[-1] if model_data_loader.datap_sem and len(model_data_loader.datap_sem[-1]) == len(time_avg) else None
current_exp_name_label = (experiment_names[experiment_counter] if experiment_counter < len(experiment_names)
else f"{sheet_name} (Promedio)")
for model_t in model_types_selected:
fig, comp_data = _process_and_plot_single_experiment(
time_avg, biomass_avg, substrate_avg, product_avg,
biomass_std_avg, substrate_std_avg, product_std_avg,
biomass_sem_avg, substrate_sem_avg, product_sem_avg,
current_exp_name_label, model_t, int(maxfev_val),
legend_position, params_position, show_legend_plot, show_params_plot,
style_plot, line_color_plot, point_color_plot, line_style_plot, marker_style_plot,
use_differential_eqs, analysis_mode, parsed_bounds_biomass,
time_unit_str, biomass_unit_str, substrate_unit_str, product_unit_str,
error_bar_type_selected, int(error_bar_capsize_selected)
)
if fig: figures_list.append(fig)
comparison_data_list.append(comp_data)
experiment_counter += 1
comparison_df = pd.DataFrame(comparison_data_list)
if not comparison_df.empty:
comparison_df_sorted = comparison_df.sort_values(
by=['R² Biomasa', 'R² Sustrato', 'R² Producto', 'RMSE Biomasa', 'RMSE Sustrato', 'RMSE Producto'],
ascending=[False, False, False, True, True, True]
).reset_index(drop=True)
else:
comparison_df_sorted = pd.DataFrame(columns=[
'Experimento', 'Modelo', 'R² Biomasa', 'RMSE Biomasa',
'R² Sustrato', 'RMSE Sustrato', 'R² Producto', 'RMSE Producto'
])
return figures_list, comparison_df_sorted, "Proceso completado."
def create_interface():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Modelos de Bioproceso: Logístico, Gompertz, Moser y Luedeking-Piret")
gr.Markdown(r"""
## Ecuaciones Diferenciales Utilizadas
**Biomasa:**
- Logístico:
$$
\frac{dX}{dt} = \mu_m X\left(1 - \frac{X}{X_m}\right)
$$
- Gompertz:
$$
X(t) = X_m \exp\left(-\exp\left(\left(\frac{\mu_m e}{X_m}\right)(\text{lag}-t)+1\right)\right)
$$
Ecuación diferencial:
$$
\frac{dX}{dt} = X(t)\left(\frac{\mu_m e}{X_m}\right)\exp\left(\left(\frac{\mu_m e}{X_m}\right)(\text{lag}-t)+1\right)
$$
- Moser (simplificado):
$$
X(t)=X_m(1-e^{-\mu_m(t-K_s)})
$$
$$
\frac{dX}{dt}=\mu_m(X_m - X)
$$
**Sustrato y Producto (Luedeking-Piret):**
$$
\frac{dS}{dt} = -p \frac{dX}{dt} - q X
$$
$$
\frac{dP}{dt} = \alpha \frac{dX}{dt} + \beta X
$$
""")
with gr.Tabs():
with gr.TabItem("Configuración Principal"):
file_input = gr.File(label="Subir archivo Excel (.xlsx)")
experiment_names = gr.Textbox(
label="Nombres de los experimentos (uno por línea, opcional)",
placeholder="Tratamiento A\nTratamiento B\n...\nSi se deja vacío, se usarán nombres de hoja/columna.",
lines=3
)
model_types = gr.CheckboxGroup(
choices=["logistic", "gompertz", "moser"],
label="Tipo(s) de Modelo de Biomasa",
value=["logistic"]
)
analysis_mode = gr.Radio(
choices=[
("Procesar cada réplica/columna independientemente", "independent"),
("Promediar réplicas por hoja (gráficos separados)", "average"),
("Promediar réplicas por hoja (gráfico combinado)", "combinado")
],
label="Modo de Análisis de Datos del Excel", value="independent"
)
use_differential = gr.Checkbox(label="Usar EDOs para predecir y graficar curvas", value=False)
maxfev_input = gr.Number(label="maxfev (Máx. iteraciones para ajuste)", value=50000, precision=0)
with gr.Accordion("Bounds para Parámetros de Biomasa (opcional)", open=False):
gr.Markdown("Especificar bounds como `valor1,valor2,valor3`. Parámetros: (X0, Xm, um) Logístico, (Xm, um, lag) Gompertz, (Xm, um, Ks) Moser.")
lower_bounds_biomass = gr.Textbox(label="Lower Bounds Biomasa (ej: 0.01,1,0.01)")
upper_bounds_biomass = gr.Textbox(label="Upper Bounds Biomasa (ej: 1,10,1)")
with gr.TabItem("Personalización de Gráficos"):
with gr.Row():
show_legend = gr.Checkbox(label="Mostrar Leyenda", value=True)
legend_position = gr.Dropdown(
choices=["best", "upper left", "upper right", "lower left", "lower right", "center left", "center right", "lower center", "upper center", "center"],
label="Posición Leyenda", value="best"
)
with gr.Row():
show_params = gr.Checkbox(label="Mostrar Parámetros/Estadísticas", value=True)
params_position = gr.Dropdown(
choices=["upper left", "upper right", "lower left", "lower right", "outside right"],
label="Posición Parámetros", value="upper right"
)
with gr.Row():
error_bar_type_radio = gr.Radio(
choices=[("Desviación Estándar (SD)", "sd"), ("Error Estándar de la Media (SEM)", "sem")],
label="Tipo de Barra de Error (para modos Promedio/Combinado)", value="sd"
)
error_bar_capsize_slider = gr.Slider(minimum=0, maximum=10, value=3, step=1,
label="Tamaño 'Cap' Barras de Error (0 para quitar)")
with gr.Row():
style_dropdown = gr.Dropdown(choices=['whitegrid', 'darkgrid', 'white', 'dark', 'ticks'], label="Estilo Seaborn", value='whitegrid')
line_style_dropdown = gr.Dropdown(choices=['-', '--', '-.', ':'], label="Estilo Línea Modelo", value='-')
marker_style_dropdown = gr.Dropdown(choices=['o', 's', '^', 'v', 'D', 'x', '+', '*'], label="Estilo Punto Datos", value='o')
with gr.Row():
line_color_picker = gr.ColorPicker(label="Color Línea Modelo", value='#0000FF')
point_color_picker = gr.ColorPicker(label="Color Puntos Datos", value='#000000')
gr.Markdown("### Unidades para los Ejes (opcional)")
with gr.Row():
time_unit_input = gr.Textbox(label="Unidad de Tiempo", placeholder="ej: h")
biomass_unit_input = gr.Textbox(label="Unidad de Biomasa", placeholder="ej: g/L")
with gr.Row():
substrate_unit_input = gr.Textbox(label="Unidad de Sustrato", placeholder="ej: g/L")
product_unit_input = gr.Textbox(label="Unidad de Producto", placeholder="ej: g/L")
simulate_btn = gr.Button("Generar Modelos y Gráficos", variant="primary")
status_message = gr.Textbox(label="Estado", interactive=False)
# CORRECCIÓN AQUÍ: columns=[1,2] cambiado a columns=(1,2)
output_gallery = gr.Gallery(label="Resultados Gráficos", columns=(1,2), height='auto', object_fit="contain")
output_table = gr.Dataframe(
label="Tabla Comparativa de Modelos",
headers=["Experimento", "Modelo", "R² Biomasa", "RMSE Biomasa",
"R² Sustrato", "RMSE Sustrato", "R² Producto", "RMSE Producto"],
interactive=False, wrap=True
)
state_df_for_export = gr.State()
def run_simulation_wrapper(
file, exp_names_str, models_sel, mode_sel, use_diff_eq, maxfev,
lb_biomass_str, ub_biomass_str,
show_leg, leg_pos, show_par, par_pos,
err_bar_type, err_bar_caps,
style_sel, lstyle_sel, mstyle_sel, lcolor, pcolor,
t_unit, b_unit, s_unit, p_unit):
exp_names_list = [name.strip() for name in exp_names_str.strip().split('\n') if name.strip()]
figures, comparison_df, message = process_all_data(
file, leg_pos, par_pos, models_sel, mode_sel, exp_names_list,
lb_biomass_str, ub_biomass_str,
style_sel, lcolor, pcolor, lstyle_sel, mstyle_sel,
show_leg, show_par, use_diff_eq, maxfev,
t_unit, b_unit, s_unit, p_unit,
err_bar_type, err_bar_caps
)
return figures, comparison_df, comparison_df, message
simulate_btn.click(
fn=run_simulation_wrapper,
inputs=[
file_input, experiment_names, model_types, analysis_mode, use_differential, maxfev_input,
lower_bounds_biomass, upper_bounds_biomass,
show_legend, legend_position, show_params, params_position,
error_bar_type_radio, error_bar_capsize_slider,
style_dropdown, line_style_dropdown, marker_style_dropdown, line_color_picker, point_color_picker,
time_unit_input, biomass_unit_input, substrate_unit_input, product_unit_input
],
outputs=[output_gallery, output_table, state_df_for_export, status_message]
)
def export_excel(df_to_export):
if df_to_export is None or df_to_export.empty:
with tempfile.NamedTemporaryFile(prefix="no_data_", suffix=".xlsx", delete=False) as tmp:
pd.DataFrame({"Mensaje": ["No hay datos para exportar."]}).to_excel(tmp.name, index=False)
return tmp.name
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
df_to_export.to_excel(tmp.name, index=False)
return tmp.name
export_btn = gr.Button("Exportar Tabla a Excel")
file_output_excel = gr.File(label="Descargar Tabla Excel")
export_btn.click(fn=export_excel, inputs=state_df_for_export, outputs=file_output_excel)
return demo
if __name__ == '__main__':
app_interface = create_interface()
app_interface.launch(share=True, debug=True)