Spaces:

C2MV
/

Project-HF-2025

Sleeping

App Files Files Community

C2MV commited on Jun 8

Commit

f57d15a

verified ·

1 Parent(s): c27baae

Update app.py

Browse files

Files changed (1) hide show

app.py +270 -120

app.py CHANGED Viewed

@@ -586,92 +586,136 @@ class AIAnalyzer:
         {data.describe().to_string()}
         """
         # Obtener prefijo de idioma
         lang_prefix = self.get_language_prompt_prefix(language)
-        # Prompt mejorado con soporte de idioma
-        prompt = f"""
-        {lang_prefix}
-        You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
-        REQUESTED DETAIL LEVEL: {detail_level}
-        PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS:
-        1. **MODEL IDENTIFICATION AND CLASSIFICATION**
-           - Identify ALL fitted mathematical models
-           - Classify them by type: biomass, substrate, product
-           - Indicate the mathematical equation of each model if possible
-        2. **COMPARATIVE ANALYSIS OF FIT QUALITY**
-           - Compare ALL available indicators: R², RMSE, AIC, BIC, etc.
-           - Create a ranking from best to worst model
-           - Identify significant differences between models
-           - Detect possible overfitting or underfitting
-        3. **DETERMINATION OF THE BEST MODEL**
-           - Select the BEST model based on MULTIPLE criteria:
-             * Highest R² (closest to 1)
-             * Lowest RMSE/MSE
-             * Lowest AIC/BIC (if available)
-             * Parsimony (fewer parameters if fit is similar)
-           - Justify NUMERICALLY why it is the best
-           - If there's a technical tie, explain the advantages of each
-        4. **SPECIFIC ANALYSIS BY VARIABLE TYPE**
-           a) **BIOMASS (if applicable)**:
-              - Growth parameters (μmax, Xmax, etc.)
-              - Doubling time
-              - Biomass productivity
-              - Numerical comparison between models
-           b) **SUBSTRATE (if applicable)**:
-              - Affinity constants (Ks, Km)
-              - Consumption rates
-              - Yield Yx/s
-              - Utilization efficiency
-           c) **PRODUCT (if applicable)**:
-              - Production parameters (α, β)
-              - Specific productivity
-              - Yield Yp/x
-              - Production type (associated/non-associated)
-        5. **BIOLOGICAL INTERPRETATION OF PARAMETERS**
-           - Explain what EACH parameter means biologically
-           - Compare values between models
-           - Evaluate if they are realistic for the system
-           - Identify critical process parameters
-        6. **CONCLUSIONS WITH NUMERICAL CONTENT**
-           - Summarize key findings with SPECIFIC NUMBERS
-           - Provide confidence intervals if available
-           - Indicate optimal operating conditions
-           - Suggest design values for scale-up
-        7. **PRACTICAL RECOMMENDATIONS**
-           - Which model(s) to use for prediction
-           - Limitations of the selected model
-           - Recommended additional experiments
-           - Considerations for industrial implementation
-        8. **FINAL COMPARATIVE TABLE**
-           Create a summary table with:
-           - Model | R² | RMSE | AIC/BIC | Key Parameters | Ranking
-        RESPONSE FORMAT:
-        - If level is "detailed": include ALL points with complete explanations
-        - If level is "summarized": focus on points 3, 6 and 8 with key numerical values
-        Use Markdown format with:
-        - Clear titles and subtitles
-        - **Bold** for important values
-        - Tables when appropriate
-        - Numbered and bulleted lists
-        IMPORTANT: Base ALL conclusions on the SPECIFIC NUMBERS from the provided data.
-        """
         try:
             response = self.client.messages.create(
@@ -683,26 +727,30 @@ class AIAnalyzer:
                 }]
             )
-            # Análisis adicional para generar código
             code_prompt = f"""
             {lang_prefix}
-            Based on the previous analysis, generate Python code for:
-            1. Load and visualize these fitting results
-            2. Create comparative model graphs (bars for R², RMSE)
-            3. Implement the best identified model
-            4. Generate predictions with the selected model
-            5. Parameter sensitivity analysis
-            Include:
-            - Necessary imports
-            - Well-documented functions
-            - Professional visualizations
-            - Error handling
-            - Usage example
-            The code should be executable and modular.
             """
             code_response = self.client.messages.create(
@@ -710,7 +758,7 @@ class AIAnalyzer:
                 max_tokens=3000,
                 messages=[{
                     "role": "user",
-                    "content": f"{code_prompt}\n\nBased on these models:\n{response.content[0].text[:1000]}"
                 }]
             )
@@ -724,7 +772,8 @@ class AIAnalyzer:
                     "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
                                            for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
                     "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
-                    "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None
                 }
             }
@@ -782,7 +831,7 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
     return analysis_text, code_text
 def generate_implementation_code(analysis_results: str) -> str:
-    """Genera código de implementación"""
     code = """
 import numpy as np
 import pandas as pd
@@ -808,12 +857,15 @@ class ComparativeModelAnalyzer:
         self.best_models = {}
         self.model_rankings = {}
-    def load_results(self, file_path: str) -> pd.DataFrame:
-        \"\"\"Load fitting results from CSV or Excel file\"\"\"
-        if file_path.endswith('.csv'):
-            self.results_df = pd.read_csv(file_path)
-        else:
-            self.results_df = pd.read_excel(file_path)
         print(f"✅ Data loaded: {len(self.results_df)} models")
         print(f"📊 Available columns: {list(self.results_df.columns)}")
@@ -880,13 +932,21 @@ class ComparativeModelAnalyzer:
         # Sort by ranking
         comparison = comparison.sort_values('Ranking')
-        # Identify best model
         best_idx = comparison['Score'].idxmax()
         self.best_models['overall'] = comparison.loc[best_idx]
-        # Print comparison table
         print("\\n" + "="*80)
-        print("📊 MODEL COMPARISON TABLE")
         print("="*80)
         print(f"\\n{'Rank':<6} {'Model':<20} {'R²':<8} {'RMSE':<10} {'AIC':<10} {'BIC':<10} {'Score':<8}")
@@ -912,29 +972,119 @@ class ComparativeModelAnalyzer:
                 print(f"{'N/A':<10} ", end="")
             print(f"{score:<8.4f}")
-        print("\\n🏆 BEST MODEL: " + comparison.iloc[0].get(model_col, 'Not specified'))
-        print(f"   - R² = {comparison.iloc[0].get(r2_col, 0):.4f}")
-        print(f"   - RMSE = {comparison.iloc[0].get(rmse_col, 0):.4f}")
         self.model_rankings = comparison
         return comparison
-# Example usage
 if __name__ == "__main__":
     print("🧬 Biotechnological Model Comparative Analysis System")
     print("="*60)
     # Create analyzer
     analyzer = ComparativeModelAnalyzer()
-    # Instructions
-    print("\\n📋 USAGE INSTRUCTIONS:")
-    print("1. analyzer.load_results('your_file.csv')")
-    print("2. analyzer.analyze_model_quality()")
-    print("3. analyzer.plot_comparison()")
-    print("4. analyzer.generate_report()")
-    print("\\n✨ System ready for analysis!")
 """
     return code

         {data.describe().to_string()}
         """
+        # Extraer valores para usar en el código
+        data_dict = data.to_dict('records')
         # Obtener prefijo de idioma
         lang_prefix = self.get_language_prompt_prefix(language)
+        # Prompt mejorado con instrucciones específicas para cada nivel
+        if detail_level == "detailed":
+            prompt = f"""
+            {lang_prefix}
+            You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
+            DETAIL LEVEL: DETAILED - Provide comprehensive analysis
+            PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS:
+            1. **MODEL IDENTIFICATION AND CLASSIFICATION**
+               - Identify ALL fitted mathematical models BY NAME (e.g., "Monod", "Logistic", "Gompertz", etc.)
+               - Classify them by type: biomass growth, substrate consumption, product formation
+               - Indicate the mathematical equation of each model
+               - Mention which experiments/conditions were tested
+            2. **COMPARATIVE ANALYSIS OF FIT QUALITY**
+               - Compare ALL available indicators: R², RMSE, AIC, BIC, etc.
+               - Create a detailed ranking from best to worst model with exact values
+               - For the TOP 3 models, specify:
+                 * Model name: [exact name from data]
+                 * R² value: [exact value]
+                 * RMSE value: [exact value]
+                 * Key parameters and their values
+               - Identify significant differences between models
+               - Detect possible overfitting or underfitting
+            3. **DETERMINATION OF THE BEST MODEL PER CATEGORY**
+               - **BEST OVERALL MODEL**: [Name] with R²=[value], RMSE=[value]
+               - **BEST BIOMASS MODEL** (if applicable): [Name] with parameters
+               - **BEST SUBSTRATE MODEL** (if applicable): [Name] with parameters
+               - **BEST PRODUCT MODEL** (if applicable): [Name] with parameters
+               - Justify NUMERICALLY why each is the best
+            4. **DETAILED ANALYSIS BY VARIABLE TYPE**
+               a) **BIOMASS (if applicable)**:
+                  - Growth parameters (μmax, Xmax, etc.) with exact values
+                  - Doubling time calculations
+                  - Biomass productivity
+                  - Compare parameters between models numerically
+               b) **SUBSTRATE (if applicable)**:
+                  - Affinity constants (Ks, Km) with exact values
+                  - Consumption rates
+                  - Yield Yx/s calculations
+                  - Utilization efficiency percentages
+               c) **PRODUCT (if applicable)**:
+                  - Production parameters (α, β) with exact values
+                  - Specific productivity calculations
+                  - Yield Yp/x values
+                  - Production type classification
+            5. **BIOLOGICAL INTERPRETATION OF PARAMETERS**
+               - Explain what EACH parameter means biologically
+               - Compare parameter values between models
+               - Evaluate if values are realistic for the biological system
+               - Identify critical process control parameters
+            6. **DETAILED CONCLUSIONS WITH NUMERICAL CONTENT**
+               - List the winning model for each category with full statistics
+               - Provide confidence intervals if available
+               - Indicate optimal operating conditions based on parameters
+               - Suggest specific design values for scale-up
+            7. **PRACTICAL RECOMMENDATIONS**
+               - Which specific models to use for different predictions
+               - Limitations of each selected model
+               - Recommended validation experiments
+               - Industrial implementation considerations
+            8. **COMPREHENSIVE COMPARATIVE TABLE**
+               Create a detailed table with ALL models showing:
+               | Model Name | Type | R² | RMSE | AIC | BIC | Key Parameters | Best For | Ranking |
+            Use Markdown format with clear structure and include ALL numerical values from the data.
+            """
+        else:  # summarized
+            prompt = f"""
+            {lang_prefix}
+            You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis of these fitting results.
+            DETAIL LEVEL: SUMMARIZED - Be concise but include all essential information
+            PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
+            1. **QUICK MODEL OVERVIEW**
+               - List ALL models tested: [names]
+               - Categories covered: biomass/substrate/product
+            2. **BEST MODELS - TOP PERFORMERS**
+               🏆 **OVERALL WINNER**: [Model Name]
+                  - R² = [exact value]
+                  - RMSE = [exact value]
+                  - Key parameters: [list with values]
+               📊 **BY CATEGORY**:
+                  - **Biomass**: [Model] (R²=[value], μmax=[value])
+                  - **Substrate**: [Model] (R²=[value], Ks=[value])
+                  - **Product**: [Model] (R²=[value], key param=[value])
+            3. **KEY NUMERICAL FINDINGS**
+               - Best fit achieved: R² = [value] with [model]
+               - Parameter ranges: μmax=[min-max], Ks=[min-max]
+               - Productivity values: [specific numbers]
+               - Yields: Yx/s=[value], Yp/x=[value]
+            4. **QUICK COMPARISON TABLE**
+               | Rank | Model | R² | RMSE | Best Application |
+               |------|-------|-----|------|------------------|
+               | 1    | [Name]| [#] | [#]  | [Use case]      |
+               | 2    | [Name]| [#] | [#]  | [Use case]      |
+               | 3    | [Name]| [#] | [#]  | [Use case]      |
+            5. **PRACTICAL CONCLUSIONS**
+               - Use [Model X] for biomass prediction (R²=[value])
+               - Use [Model Y] for substrate monitoring (R²=[value])
+               - Critical parameters for control: [list with values]
+               - Scale-up recommendation: [specific values]
+            Keep it concise but include ALL model names and key numerical values.
+            """
         try:
             response = self.client.messages.create(
                 }]
             )
+            # Análisis adicional para generar código con valores numéricos reales
             code_prompt = f"""
             {lang_prefix}
+            Based on the analysis and this actual data:
+            {data.to_string()}
+            Generate Python code that:
+            1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
+            2. Implements the best models identified with their EXACT parameters
+            3. Includes visualization functions that use the REAL data values
+            4. Shows comparative analysis with the SPECIFIC numbers from the results
+            The code must include:
+            - Data loading section with the actual values hardcoded as example
+            - Model implementation with the exact parameter values found
+            - Visualization showing the actual R², RMSE values in graphs
+            - Comparison functions using the real numerical data
+            - Predictions using the best model's actual parameters
+            Make sure to include comments indicating which model won and why, with its exact statistics.
+            Format: Complete, executable Python code with actual data values embedded.
             """
             code_response = self.client.messages.create(
                 max_tokens=3000,
                 messages=[{
                     "role": "user",
+                    "content": code_prompt
                 }]
             )
                     "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
                                            for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
                     "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
+                    "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
+                    "datos_completos": data_dict  # Incluir todos los datos para el código
                 }
             }
     return analysis_text, code_text
 def generate_implementation_code(analysis_results: str) -> str:
+    """Genera código de implementación con valores numéricos del análisis"""
     code = """
 import numpy as np
 import pandas as pd
         self.best_models = {}
         self.model_rankings = {}
+    def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
+        \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
+        if data_dict:
+            self.results_df = pd.DataFrame(data_dict)
+        elif file_path:
+            if file_path.endswith('.csv'):
+                self.results_df = pd.read_csv(file_path)
+            else:
+                self.results_df = pd.read_excel(file_path)
         print(f"✅ Data loaded: {len(self.results_df)} models")
         print(f"📊 Available columns: {list(self.results_df.columns)}")
         # Sort by ranking
         comparison = comparison.sort_values('Ranking')
+        # Identify best models by category
+        if 'Type' in comparison.columns:
+            for model_type in comparison['Type'].unique():
+                type_models = comparison[comparison['Type'] == model_type]
+                if not type_models.empty:
+                    best_idx = type_models['Score'].idxmax()
+                    self.best_models[model_type] = type_models.loc[best_idx]
+        # Best overall model
         best_idx = comparison['Score'].idxmax()
         self.best_models['overall'] = comparison.loc[best_idx]
+        # Print comparison table with actual values
         print("\\n" + "="*80)
+        print("📊 MODEL COMPARISON TABLE - ACTUAL RESULTS")
         print("="*80)
         print(f"\\n{'Rank':<6} {'Model':<20} {'R²':<8} {'RMSE':<10} {'AIC':<10} {'BIC':<10} {'Score':<8}")
                 print(f"{'N/A':<10} ", end="")
             print(f"{score:<8.4f}")
+        print("\\n🏆 BEST MODELS BY CATEGORY:")
+        for category, model_data in self.best_models.items():
+            if isinstance(model_data, pd.Series):
+                print(f"\\n{category.upper()}:")
+                print(f"  Model: {model_data.get(model_col, 'Unknown')}")
+                print(f"  R² = {model_data.get(r2_col, 0):.4f}")
+                print(f"  RMSE = {model_data.get(rmse_col, 0):.4f}")
         self.model_rankings = comparison
         return comparison
+    def visualize_comparison(self):
+        \"\"\"Create visualization of model comparison with actual data\"\"\"
+        if self.model_rankings is None:
+            raise ValueError("First run analyze_model_quality()")
+        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+        fig.suptitle('Model Comparison - Actual Fitting Results', fontsize=16)
+        # 1. R² comparison
+        ax1 = axes[0, 0]
+        models = self.model_rankings.get('Model', self.model_rankings.index)
+        r2_values = self.model_rankings.get('R2', [])
+        ax1.bar(range(len(models)), r2_values, color='skyblue')
+        ax1.set_xlabel('Models')
+        ax1.set_ylabel('R²')
+        ax1.set_title('R² Comparison')
+        ax1.set_xticks(range(len(models)))
+        ax1.set_xticklabels(models, rotation=45, ha='right')
+        ax1.axhline(y=0.95, color='r', linestyle='--', label='Excellent fit (0.95)')
+        ax1.legend()
+        # Add actual values on bars
+        for i, v in enumerate(r2_values):
+            ax1.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
+        # 2. RMSE comparison
+        ax2 = axes[0, 1]
+        rmse_values = self.model_rankings.get('RMSE', [])
+        ax2.bar(range(len(models)), rmse_values, color='salmon')
+        ax2.set_xlabel('Models')
+        ax2.set_ylabel('RMSE')
+        ax2.set_title('RMSE Comparison (Lower is Better)')
+        ax2.set_xticks(range(len(models)))
+        ax2.set_xticklabels(models, rotation=45, ha='right')
+        # Add actual values on bars
+        for i, v in enumerate(rmse_values):
+            ax2.text(i, v + 0.001, f'{v:.3f}', ha='center', va='bottom')
+        # 3. Combined score
+        ax3 = axes[1, 0]
+        scores = self.model_rankings.get('Score', [])
+        ax3.bar(range(len(models)), scores, color='lightgreen')
+        ax3.set_xlabel('Models')
+        ax3.set_ylabel('Combined Score')
+        ax3.set_title('Overall Model Score')
+        ax3.set_xticks(range(len(models)))
+        ax3.set_xticklabels(models, rotation=45, ha='right')
+        # 4. Ranking visualization
+        ax4 = axes[1, 1]
+        rankings = self.model_rankings.get('Ranking', [])
+        ax4.scatter(r2_values, rmse_values, s=100, c=rankings, cmap='viridis')
+        ax4.set_xlabel('R²')
+        ax4.set_ylabel('RMSE')
+        ax4.set_title('R² vs RMSE (color = ranking)')
+        # Annotate best model
+        best_model = self.best_models.get('overall')
+        if isinstance(best_model, pd.Series):
+            best_r2 = best_model.get('R2', 0)
+            best_rmse = best_model.get('RMSE', 0)
+            best_name = best_model.get('Model', 'Best')
+            ax4.annotate(f'Best: {best_name}',
+                        xy=(best_r2, best_rmse),
+                        xytext=(best_r2-0.05, best_rmse+0.01),
+                        arrowprops=dict(arrowstyle='->', color='red'))
+        plt.tight_layout()
+        plt.show()
+# Example usage with actual data
 if __name__ == "__main__":
     print("🧬 Biotechnological Model Comparative Analysis System")
     print("="*60)
+    # Example data structure (replace with your actual data)
+    example_data = {
+        'Model': ['Monod', 'Logistic', 'Gompertz', 'Modified_Gompertz'],
+        'Type': ['Substrate', 'Biomass', 'Biomass', 'Biomass'],
+        'R2': [0.9845, 0.9912, 0.9956, 0.9889],
+        'RMSE': [0.0234, 0.0189, 0.0145, 0.0201],
+        'AIC': [-45.23, -48.91, -52.34, -47.56],
+        'BIC': [-42.11, -45.79, -49.22, -44.44],
+        'mu_max': [0.45, 0.48, 0.52, 0.49],
+        'Ks': [2.1, None, None, None],
+        'Xmax': [None, 12.5, 13.1, 12.8]
+    }
     # Create analyzer
     analyzer = ComparativeModelAnalyzer()
+    # Load data
+    analyzer.load_results(data_dict=example_data)
+    # Analyze
+    results = analyzer.analyze_model_quality()
+    # Visualize
+    analyzer.visualize_comparison()
+    print("\\n✨ Analysis complete! Best models identified with actual parameters.")
 """
     return code