Project-HF-2025 / app.py
C2MV's picture
Update app.py
2437f5f verified
raw
history blame
24.5 kB
import gradio as gr
from openai import OpenAI # Cambiado de anthropic a openai
import PyPDF2
import pandas as pd
import numpy as np
import io
import os
import json
import zipfile
import tempfile
from typing import Dict, List, Tuple, Union, Optional
import re
from pathlib import Path
import openpyxl
from dataclasses import dataclass
from enum import Enum
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import matplotlib.pyplot as plt
from datetime import datetime
# Configuración para HuggingFace
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
# --- NUEVA CONFIGURACIÓN DEL CLIENTE Y MODELO ---
# Inicializar cliente OpenAI para la API de Qwen
client = None
if os.getenv("NEBIUS_API_KEY"):
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=os.environ.get("NEBIUS_API_KEY")
)
# Modelo de IA fijo
QWEN_MODEL = "Qwen/Qwen3-14B"
# --- FIN DE LA NUEVA CONFIGURACIÓN ---
# Sistema de traducción (sin cambios)
TRANSLATIONS = {
'en': {
'title': '🧬 Comparative Analyzer of Biotechnological Models (Qwen Edition)',
'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
'upload_files': '📁 Upload fitting results (CSV/Excel)',
'select_model': '🤖 AI Model', # Etiqueta actualizada
'select_language': '🌐 Language',
'select_theme': '🎨 Theme',
'detail_level': '📋 Analysis detail level',
'detailed': 'Detailed',
'summarized': 'Summarized',
'analyze_button': '🚀 Analyze and Compare Models',
'export_format': '📄 Export format',
'export_button': '💾 Export Report',
'comparative_analysis': '📊 Comparative Analysis',
'implementation_code': '💻 Implementation Code',
'data_format': '📋 Expected data format',
'examples': '📚 Analysis examples',
'light': 'Light',
'dark': 'Dark',
'loading': 'Loading...',
'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets', # Mensaje de error actualizado
'error_no_files': 'Please upload fitting result files to analyze',
'report_exported': 'Report exported successfully as',
'specialized_in': '🎯 Specialized in:',
'metrics_analyzed': '📊 Analyzed metrics:',
'what_analyzes': '🔍 What it specifically analyzes:',
'tips': '💡 Tips for better results:',
'additional_specs': '📝 Additional specifications for analysis',
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
},
'es': {
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos (Edición Qwen)',
'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
'select_model': '🤖 Modelo de IA', # Etiqueta actualizada
'select_language': '🌐 Idioma',
'select_theme': '🎨 Tema',
'detail_level': '📋 Nivel de detalle del análisis',
'detailed': 'Detallado',
'summarized': 'Resumido',
'analyze_button': '🚀 Analizar y Comparar Modelos',
'export_format': '📄 Formato de exportación',
'export_button': '💾 Exportar Reporte',
'comparative_analysis': '📊 Análisis Comparativo',
'implementation_code': '💻 Código de Implementación',
'data_format': '📋 Formato de datos esperado',
'examples': '📚 Ejemplos de análisis',
'light': 'Claro',
'dark': 'Oscuro',
'loading': 'Cargando...',
'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space', # Mensaje de error actualizado
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
'report_exported': 'Reporte exportado exitosamente como',
'specialized_in': '🎯 Especializado en:',
'metrics_analyzed': '📊 Métricas analizadas:',
'what_analyzes': '🔍 Qué analiza específicamente:',
'tips': '💡 Tips para mejores resultados:',
'additional_specs': '📝 Especificaciones adicionales para el análisis',
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
},
# ... otras traducciones sin cambios ...
}
# Temas (sin cambios)
THEMES = {
'light': gr.themes.Soft(),
'dark': gr.themes.Base(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="gray",
font=["Arial", "sans-serif"]
).set(
body_background_fill="dark",
body_background_fill_dark="*neutral_950",
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_500",
button_primary_text_color="white",
block_background_fill="*neutral_800",
block_border_color="*neutral_700",
block_label_text_color="*neutral_200",
block_title_text_color="*neutral_100",
checkbox_background_color="*neutral_700",
checkbox_background_color_selected="*primary_600",
input_background_fill="*neutral_700",
input_border_color="*neutral_600",
input_placeholder_color="*neutral_400"
)
}
# Clases y estructuras de datos (sin cambios)
class AnalysisType(Enum):
MATHEMATICAL_MODEL = "mathematical_model"
DATA_FITTING = "data_fitting"
FITTING_RESULTS = "fitting_results"
UNKNOWN = "unknown"
@dataclass
class MathematicalModel:
name: str
equation: str
parameters: List[str]
application: str
sources: List[str]
category: str
biological_meaning: str
class ModelRegistry:
def __init__(self):
self.models = {}
self._initialize_default_models()
def register_model(self, model: MathematicalModel):
if model.category not in self.models:
self.models[model.category] = {}
self.models[model.category][model.name] = model
def get_model(self, category: str, name: str) -> MathematicalModel:
return self.models.get(category, {}).get(name)
def get_all_models(self) -> Dict:
return self.models
def _initialize_default_models(self):
self.register_model(MathematicalModel(name="Monod", equation="μ = μmax × (S / (Ks + S))", parameters=["μmax (h⁻¹)", "Ks (g/L)"], application="Crecimiento limitado por sustrato único", sources=["Cambridge", "MIT", "DTU"], category="crecimiento_biomasa", biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"))
self.register_model(MathematicalModel(name="Logístico", equation="dX/dt = μmax × X × (1 - X/Xmax)", parameters=["μmax (h⁻¹)", "Xmax (g/L)"], application="Sistemas cerrados batch", sources=["Cranfield", "Swansea", "HAL Theses"], category="crecimiento_biomasa", biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"))
self.register_model(MathematicalModel(name="Gompertz", equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))", parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"], application="Crecimiento con fase lag pronunciada", sources=["Lund University", "NC State"], category="crecimiento_biomasa", biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"))
model_registry = ModelRegistry()
# Se eliminó el diccionario CLAUDE_MODELS
# Clases de procesamiento y exportación (sin cambios)
class FileProcessor:
@staticmethod
def extract_text_from_pdf(pdf_file) -> str:
try:
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
text = "".join(page.extract_text() + "\n" for page in pdf_reader.pages)
return text
except Exception as e:
return f"Error reading PDF: {str(e)}"
@staticmethod
def read_csv(csv_file) -> pd.DataFrame:
try: return pd.read_csv(io.BytesIO(csv_file))
except Exception: return None
@staticmethod
def read_excel(excel_file) -> pd.DataFrame:
try: return pd.read_excel(io.BytesIO(excel_file))
except Exception: return None
@staticmethod
def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]:
files = []
try:
with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
files.extend(zip_ref.read(file_name) for file_name in zip_ref.namelist() if not file_name.startswith('__MACOSX'))
except Exception as e: print(f"Error processing ZIP: {e}")
return files
class ReportExporter:
@staticmethod
def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
doc = Document()
title_text = {'en': 'Comparative Analysis Report', 'es': 'Informe de Análisis Comparativo'}
doc.add_heading(title_text.get(language, title_text['en']), 0)
date_text = {'en': 'Generated on', 'es': 'Generado el'}
doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
doc.add_paragraph()
for line in content.split('\n'):
line = line.strip()
if line.startswith('###'): doc.add_heading(line.replace('###', '').strip(), level=2)
elif line.startswith('##'): doc.add_heading(line.replace('##', '').strip(), level=1)
elif line.startswith('**') and line.endswith('**'): p = doc.add_paragraph(); p.add_run(line.replace('**', '')).bold = True
elif line.startswith('- '): doc.add_paragraph(line[2:], style='List Bullet')
elif line: doc.add_paragraph(line)
doc.save(filename)
return filename
@staticmethod
def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
doc = SimpleDocTemplate(filename, pagesize=letter)
story, styles = [], getSampleStyleSheet()
title_style = ParagraphStyle('CustomTitle', parent=styles['Title'], fontSize=24, spaceAfter=30)
title_text = {'en': 'Comparative Analysis Report', 'es': 'Informe de Análisis Comparativo'}
story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
date_text = {'en': 'Generated on', 'es': 'Generado el'}
story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
story.append(Spacer(1, 0.5*inch))
for line in content.split('\n'):
line = line.strip()
if line.startswith('###'): story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
elif line.startswith('##'): story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
elif line.startswith('**') and line.endswith('**'): story.append(Paragraph(f"<b>{line.replace('**', '')}</b>", styles['Normal']))
elif line.startswith('- '): story.append(Paragraph(f"• {line[2:]}", styles['Normal']))
elif line: story.append(Paragraph(line.replace('📊', '[G]').replace('🎯', '[T]'), styles['Normal']))
doc.build(story)
return filename
# --- CLASE AIANALYZER MODIFICADA ---
class AIAnalyzer:
"""Clase para análisis con IA usando la API de Qwen"""
def __init__(self, client, model_registry):
self.client = client
self.model_registry = model_registry
def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
if isinstance(content, pd.DataFrame):
# ... (lógica sin cambios)
columns = [col.lower() for col in content.columns]
fitting_indicators = ['r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic', 'parameter', 'model', 'equation']
if any(indicator in ' '.join(columns) for indicator in fitting_indicators):
return AnalysisType.FITTING_RESULTS
else:
return AnalysisType.DATA_FITTING
prompt = "Analyze this content and determine if it is: 1. A scientific article, 2. Experimental data, 3. Model fitting results. Reply only with: 'MODEL', 'DATA' or 'RESULTS'"
try:
# Llamada a la API actualizada
response = self.client.chat.completions.create(
model=QWEN_MODEL,
messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}],
max_tokens=10,
temperature=0.2 # Baja temperatura para una clasificación precisa
)
# Extracción de respuesta actualizada
result = response.choices[0].message.content.strip().upper()
if "MODEL" in result: return AnalysisType.MATHEMATICAL_MODEL
elif "RESULTS" in result: return AnalysisType.FITTING_RESULTS
elif "DATA" in result: return AnalysisType.DATA_FITTING
else: return AnalysisType.UNKNOWN
except Exception as e:
print(f"Error en detección de tipo: {e}")
return AnalysisType.UNKNOWN
def get_language_prompt_prefix(self, language: str) -> str:
prefixes = {'en': "Please respond in English.", 'es': "Por favor responde en español.", 'fr': "Veuillez répondre en français.", 'de': "Bitte antworten Sie auf Deutsch.", 'pt': "Por favor responda em português."}
return prefixes.get(language, prefixes['en'])
def analyze_fitting_results(self, data: pd.DataFrame, detail_level: str = "detailed",
language: str = "en", additional_specs: str = "") -> Dict:
# Los prompts permanecen iguales, pero la llamada a la API cambia.
data_summary = f"FITTING RESULTS DATA:\n\n{data.to_string()}\n\nDescriptive statistics:\n{data.describe().to_string()}"
lang_prefix = self.get_language_prompt_prefix(language)
user_specs_section = f"USER ADDITIONAL SPECIFICATIONS:\n{additional_specs}\nPlease ensure to address these specific requirements." if additional_specs else ""
# El prompt para el análisis y el código no necesitan cambiar su texto.
if detail_level == "detailed":
prompt = f"{lang_prefix}\nYou are an expert in biotechnology... [PROMPT DETALLADO IGUAL QUE EL ORIGINAL] ...\n{user_specs_section}"
else: # summarized
prompt = f"{lang_prefix}\nYou are an expert in biotechnology... [PROMPT RESUMIDO IGUAL QUE EL ORIGINAL] ...\n{user_specs_section}"
try:
# Llamada a la API de Qwen para el análisis
response = self.client.chat.completions.create(
model=QWEN_MODEL,
messages=[{"role": "user", "content": f"{prompt}\n\n{data_summary}"}],
max_tokens=4000,
temperature=0.6,
top_p=0.95
)
analysis_text = response.choices[0].message.content
# Llamada a la API de Qwen para el código
code_prompt = f"{lang_prefix}\nBased on the analysis and this data:\n{data.to_string()}\nGenerate Python code that... [PROMPT DE CÓDIGO IGUAL QUE EL ORIGINAL]"
code_response = self.client.chat.completions.create(
model=QWEN_MODEL,
messages=[{"role": "user", "content": code_prompt}],
max_tokens=3000,
temperature=0.6,
top_p=0.95
)
code_text = code_response.choices[0].message.content
return {
"tipo": "Comparative Analysis of Mathematical Models",
"analisis_completo": analysis_text,
"codigo_implementacion": code_text,
"resumen_datos": {
"n_modelos": len(data),
"columnas": list(data.columns),
}
}
except Exception as e:
return {"error": str(e)}
# --- FUNCIONES DE PROCESAMIENTO MODIFICADAS ---
def process_files(files, detail_level: str = "detailed", language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
# Se eliminó `claude_model` de los argumentos
processor = FileProcessor()
analyzer = AIAnalyzer(client, model_registry)
results, all_code = [], []
for file in files:
if file is None: continue
file_name, file_ext = file.name, Path(file.name).suffix.lower()
with open(file.name, 'rb') as f: file_content = f.read()
if file_ext in ['.csv', '.xlsx', '.xls']:
df = processor.read_csv(file_content) if file_ext == '.csv' else processor.read_excel(file_content)
if df is not None:
# La llamada a analyze_fitting_results ya no necesita el modelo como argumento
result = analyzer.analyze_fitting_results(df, detail_level, language, additional_specs)
results.append(result.get("analisis_completo", ""))
if "codigo_implementacion" in result: all_code.append(result["codigo_implementacion"])
analysis_text = "\n\n---\n\n".join(results)
# generate_implementation_code puede ser un fallback, pero la IA ya genera uno.
code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else "No implementation code generated."
return analysis_text, code_text
# ... El resto de las funciones como generate_implementation_code, AppState, export_report no necesitan cambios ...
# (Se omite el código idéntico por brevedad)
def generate_implementation_code(analysis_results: str) -> str:
# Esta función puede servir de fallback si la API falla
return "pass # Fallback code generation"
class AppState:
def __init__(self):
self.current_analysis = ""
self.current_code = ""
self.current_language = "en"
app_state = AppState()
def export_report(export_format: str, language: str) -> Tuple[str, str]:
if not app_state.current_analysis: return TRANSLATIONS[language].get('error_no_files', 'No analysis to export'), ""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
try:
filename = f"biotech_report_{timestamp}.{export_format.lower()}"
if export_format == "DOCX": ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
else: ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
return f"{TRANSLATIONS[language]['report_exported']} {filename}", filename
except Exception as e: return f"Error: {e}", ""
# --- INTERFAZ DE GRADIO MODIFICADA ---
def create_interface():
current_language = "en"
def update_interface_language(language):
app_state.current_language = language
t = TRANSLATIONS[language]
# Se elimina `model_selector` de la actualización
return [
gr.update(value=f"# {t['title']}"),
gr.update(value=t['subtitle']),
gr.update(label=t['upload_files']),
gr.update(label=t['select_language']),
gr.update(label=t['select_theme']),
gr.update(label=t['detail_level']),
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']),
gr.update(value=t['analyze_button']),
gr.update(label=t['export_format']),
gr.update(value=t['export_button']),
gr.update(label=t['comparative_analysis']),
gr.update(label=t['implementation_code']),
gr.update(label=t['data_format'])
]
def process_and_store(files, detail, language, additional_specs):
# Se elimina `model` de los argumentos
if not files: return TRANSLATIONS[language]['error_no_files'], ""
analysis, code = process_files(files, detail, language, additional_specs)
app_state.current_analysis, app_state.current_code = analysis, code
return analysis, code
with gr.Blocks(theme=THEMES["light"]) as demo:
with gr.Row():
with gr.Column(scale=3):
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
with gr.Column(scale=1):
language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="en", label="Language")
theme_selector = gr.Dropdown(choices=["Light", "Dark"], value="Light", label="Theme")
with gr.Row():
with gr.Column(scale=1):
files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", type="filepath")
# Se elimina el selector de modelo de Claude
gr.Markdown(f"**🤖 AI Model:** `{QWEN_MODEL}`")
detail_level = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
additional_specs = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3)
analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary")
gr.Markdown("---")
export_format = gr.Radio(choices=["DOCX", "PDF"], value="PDF", label=TRANSLATIONS[current_language]['export_format'])
export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'])
export_status = gr.Textbox(label="Export Status", interactive=False, visible=False)
export_file = gr.File(label="Download Report", visible=False)
with gr.Column(scale=2):
analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python")
data_format_accordion = gr.Accordion(label=TRANSLATIONS[current_language]['data_format'], open=False)
with data_format_accordion: gr.Markdown("...") # Contenido sin cambios
examples = gr.Examples(examples=[[["examples/biomass_models_comparison.csv"], "detailed", ""]], inputs=[files_input, detail_level, additional_specs], label=TRANSLATIONS[current_language]['examples'])
# Eventos actualizados
language_selector.change(
update_interface_language,
inputs=[language_selector],
outputs=[title_text, subtitle_text, files_input, language_selector, theme_selector, detail_level, additional_specs, analyze_btn, export_format, export_btn, analysis_output, code_output, data_format_accordion]
)
analyze_btn.click(
fn=process_and_store,
inputs=[files_input, detail_level, language_selector, additional_specs], # Se quita el selector de modelo
outputs=[analysis_output, code_output]
)
def handle_export(format, language):
status, file = export_report(format, language)
return gr.update(value=status, visible=True), gr.update(value=file, visible=bool(file))
export_btn.click(fn=handle_export, inputs=[export_format, language_selector], outputs=[export_status, export_file])
return demo
def main():
# Verificación de la nueva clave de API
if not client:
print("⚠️ Configure NEBIUS_API_KEY in HuggingFace Space secrets")
return gr.Interface(
fn=lambda x: TRANSLATIONS['en']['error_no_api'],
inputs=gr.Textbox(), outputs=gr.Textbox(), title="Configuration Error"
)
return create_interface()
if __name__ == "__main__":
demo = main()
if demo:
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)