Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -586,92 +586,136 @@ class AIAnalyzer:
|
|
| 586 |
{data.describe().to_string()}
|
| 587 |
"""
|
| 588 |
|
|
|
|
|
|
|
|
|
|
| 589 |
# Obtener prefijo de idioma
|
| 590 |
lang_prefix = self.get_language_prompt_prefix(language)
|
| 591 |
|
| 592 |
-
# Prompt mejorado con
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
|
| 676 |
try:
|
| 677 |
response = self.client.messages.create(
|
|
@@ -683,26 +727,30 @@ class AIAnalyzer:
|
|
| 683 |
}]
|
| 684 |
)
|
| 685 |
|
| 686 |
-
# Análisis adicional para generar código
|
| 687 |
code_prompt = f"""
|
| 688 |
{lang_prefix}
|
| 689 |
|
| 690 |
-
Based on the
|
|
|
|
| 691 |
|
| 692 |
-
|
| 693 |
-
2. Create comparative model graphs (bars for R², RMSE)
|
| 694 |
-
3. Implement the best identified model
|
| 695 |
-
4. Generate predictions with the selected model
|
| 696 |
-
5. Parameter sensitivity analysis
|
| 697 |
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
- Error handling
|
| 703 |
-
- Usage example
|
| 704 |
|
| 705 |
-
The code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
"""
|
| 707 |
|
| 708 |
code_response = self.client.messages.create(
|
|
@@ -710,7 +758,7 @@ class AIAnalyzer:
|
|
| 710 |
max_tokens=3000,
|
| 711 |
messages=[{
|
| 712 |
"role": "user",
|
| 713 |
-
"content":
|
| 714 |
}]
|
| 715 |
)
|
| 716 |
|
|
@@ -724,7 +772,8 @@ class AIAnalyzer:
|
|
| 724 |
"metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
|
| 725 |
for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
|
| 726 |
"mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
|
| 727 |
-
"mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None
|
|
|
|
| 728 |
}
|
| 729 |
}
|
| 730 |
|
|
@@ -782,7 +831,7 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
|
|
| 782 |
return analysis_text, code_text
|
| 783 |
|
| 784 |
def generate_implementation_code(analysis_results: str) -> str:
|
| 785 |
-
"""Genera código de implementación"""
|
| 786 |
code = """
|
| 787 |
import numpy as np
|
| 788 |
import pandas as pd
|
|
@@ -808,12 +857,15 @@ class ComparativeModelAnalyzer:
|
|
| 808 |
self.best_models = {}
|
| 809 |
self.model_rankings = {}
|
| 810 |
|
| 811 |
-
def load_results(self, file_path: str) -> pd.DataFrame:
|
| 812 |
-
\"\"\"Load fitting results from CSV or
|
| 813 |
-
if
|
| 814 |
-
self.results_df = pd.
|
| 815 |
-
|
| 816 |
-
|
|
|
|
|
|
|
|
|
|
| 817 |
|
| 818 |
print(f"✅ Data loaded: {len(self.results_df)} models")
|
| 819 |
print(f"📊 Available columns: {list(self.results_df.columns)}")
|
|
@@ -880,13 +932,21 @@ class ComparativeModelAnalyzer:
|
|
| 880 |
# Sort by ranking
|
| 881 |
comparison = comparison.sort_values('Ranking')
|
| 882 |
|
| 883 |
-
# Identify best
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 884 |
best_idx = comparison['Score'].idxmax()
|
| 885 |
self.best_models['overall'] = comparison.loc[best_idx]
|
| 886 |
|
| 887 |
-
# Print comparison table
|
| 888 |
print("\\n" + "="*80)
|
| 889 |
-
print("📊 MODEL COMPARISON TABLE")
|
| 890 |
print("="*80)
|
| 891 |
|
| 892 |
print(f"\\n{'Rank':<6} {'Model':<20} {'R²':<8} {'RMSE':<10} {'AIC':<10} {'BIC':<10} {'Score':<8}")
|
|
@@ -912,29 +972,119 @@ class ComparativeModelAnalyzer:
|
|
| 912 |
print(f"{'N/A':<10} ", end="")
|
| 913 |
print(f"{score:<8.4f}")
|
| 914 |
|
| 915 |
-
print("\\n🏆 BEST
|
| 916 |
-
|
| 917 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
|
| 919 |
self.model_rankings = comparison
|
| 920 |
return comparison
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
|
| 922 |
-
# Example usage
|
| 923 |
if __name__ == "__main__":
|
| 924 |
print("🧬 Biotechnological Model Comparative Analysis System")
|
| 925 |
print("="*60)
|
| 926 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 927 |
# Create analyzer
|
| 928 |
analyzer = ComparativeModelAnalyzer()
|
| 929 |
|
| 930 |
-
#
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
|
|
|
|
|
|
| 936 |
|
| 937 |
-
print("\\n✨
|
| 938 |
"""
|
| 939 |
|
| 940 |
return code
|
|
|
|
| 586 |
{data.describe().to_string()}
|
| 587 |
"""
|
| 588 |
|
| 589 |
+
# Extraer valores para usar en el código
|
| 590 |
+
data_dict = data.to_dict('records')
|
| 591 |
+
|
| 592 |
# Obtener prefijo de idioma
|
| 593 |
lang_prefix = self.get_language_prompt_prefix(language)
|
| 594 |
|
| 595 |
+
# Prompt mejorado con instrucciones específicas para cada nivel
|
| 596 |
+
if detail_level == "detailed":
|
| 597 |
+
prompt = f"""
|
| 598 |
+
{lang_prefix}
|
| 599 |
+
|
| 600 |
+
You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
|
| 601 |
+
|
| 602 |
+
DETAIL LEVEL: DETAILED - Provide comprehensive analysis
|
| 603 |
+
|
| 604 |
+
PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS:
|
| 605 |
+
|
| 606 |
+
1. **MODEL IDENTIFICATION AND CLASSIFICATION**
|
| 607 |
+
- Identify ALL fitted mathematical models BY NAME (e.g., "Monod", "Logistic", "Gompertz", etc.)
|
| 608 |
+
- Classify them by type: biomass growth, substrate consumption, product formation
|
| 609 |
+
- Indicate the mathematical equation of each model
|
| 610 |
+
- Mention which experiments/conditions were tested
|
| 611 |
+
|
| 612 |
+
2. **COMPARATIVE ANALYSIS OF FIT QUALITY**
|
| 613 |
+
- Compare ALL available indicators: R², RMSE, AIC, BIC, etc.
|
| 614 |
+
- Create a detailed ranking from best to worst model with exact values
|
| 615 |
+
- For the TOP 3 models, specify:
|
| 616 |
+
* Model name: [exact name from data]
|
| 617 |
+
* R² value: [exact value]
|
| 618 |
+
* RMSE value: [exact value]
|
| 619 |
+
* Key parameters and their values
|
| 620 |
+
- Identify significant differences between models
|
| 621 |
+
- Detect possible overfitting or underfitting
|
| 622 |
+
|
| 623 |
+
3. **DETERMINATION OF THE BEST MODEL PER CATEGORY**
|
| 624 |
+
- **BEST OVERALL MODEL**: [Name] with R²=[value], RMSE=[value]
|
| 625 |
+
- **BEST BIOMASS MODEL** (if applicable): [Name] with parameters
|
| 626 |
+
- **BEST SUBSTRATE MODEL** (if applicable): [Name] with parameters
|
| 627 |
+
- **BEST PRODUCT MODEL** (if applicable): [Name] with parameters
|
| 628 |
+
- Justify NUMERICALLY why each is the best
|
| 629 |
+
|
| 630 |
+
4. **DETAILED ANALYSIS BY VARIABLE TYPE**
|
| 631 |
+
a) **BIOMASS (if applicable)**:
|
| 632 |
+
- Growth parameters (μmax, Xmax, etc.) with exact values
|
| 633 |
+
- Doubling time calculations
|
| 634 |
+
- Biomass productivity
|
| 635 |
+
- Compare parameters between models numerically
|
| 636 |
+
|
| 637 |
+
b) **SUBSTRATE (if applicable)**:
|
| 638 |
+
- Affinity constants (Ks, Km) with exact values
|
| 639 |
+
- Consumption rates
|
| 640 |
+
- Yield Yx/s calculations
|
| 641 |
+
- Utilization efficiency percentages
|
| 642 |
+
|
| 643 |
+
c) **PRODUCT (if applicable)**:
|
| 644 |
+
- Production parameters (α, β) with exact values
|
| 645 |
+
- Specific productivity calculations
|
| 646 |
+
- Yield Yp/x values
|
| 647 |
+
- Production type classification
|
| 648 |
+
|
| 649 |
+
5. **BIOLOGICAL INTERPRETATION OF PARAMETERS**
|
| 650 |
+
- Explain what EACH parameter means biologically
|
| 651 |
+
- Compare parameter values between models
|
| 652 |
+
- Evaluate if values are realistic for the biological system
|
| 653 |
+
- Identify critical process control parameters
|
| 654 |
+
|
| 655 |
+
6. **DETAILED CONCLUSIONS WITH NUMERICAL CONTENT**
|
| 656 |
+
- List the winning model for each category with full statistics
|
| 657 |
+
- Provide confidence intervals if available
|
| 658 |
+
- Indicate optimal operating conditions based on parameters
|
| 659 |
+
- Suggest specific design values for scale-up
|
| 660 |
+
|
| 661 |
+
7. **PRACTICAL RECOMMENDATIONS**
|
| 662 |
+
- Which specific models to use for different predictions
|
| 663 |
+
- Limitations of each selected model
|
| 664 |
+
- Recommended validation experiments
|
| 665 |
+
- Industrial implementation considerations
|
| 666 |
+
|
| 667 |
+
8. **COMPREHENSIVE COMPARATIVE TABLE**
|
| 668 |
+
Create a detailed table with ALL models showing:
|
| 669 |
+
| Model Name | Type | R² | RMSE | AIC | BIC | Key Parameters | Best For | Ranking |
|
| 670 |
+
|
| 671 |
+
Use Markdown format with clear structure and include ALL numerical values from the data.
|
| 672 |
+
"""
|
| 673 |
+
else: # summarized
|
| 674 |
+
prompt = f"""
|
| 675 |
+
{lang_prefix}
|
| 676 |
+
|
| 677 |
+
You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis of these fitting results.
|
| 678 |
+
|
| 679 |
+
DETAIL LEVEL: SUMMARIZED - Be concise but include all essential information
|
| 680 |
+
|
| 681 |
+
PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
|
| 682 |
+
|
| 683 |
+
1. **QUICK MODEL OVERVIEW**
|
| 684 |
+
- List ALL models tested: [names]
|
| 685 |
+
- Categories covered: biomass/substrate/product
|
| 686 |
+
|
| 687 |
+
2. **BEST MODELS - TOP PERFORMERS**
|
| 688 |
+
🏆 **OVERALL WINNER**: [Model Name]
|
| 689 |
+
- R² = [exact value]
|
| 690 |
+
- RMSE = [exact value]
|
| 691 |
+
- Key parameters: [list with values]
|
| 692 |
+
|
| 693 |
+
📊 **BY CATEGORY**:
|
| 694 |
+
- **Biomass**: [Model] (R²=[value], μmax=[value])
|
| 695 |
+
- **Substrate**: [Model] (R²=[value], Ks=[value])
|
| 696 |
+
- **Product**: [Model] (R²=[value], key param=[value])
|
| 697 |
+
|
| 698 |
+
3. **KEY NUMERICAL FINDINGS**
|
| 699 |
+
- Best fit achieved: R² = [value] with [model]
|
| 700 |
+
- Parameter ranges: μmax=[min-max], Ks=[min-max]
|
| 701 |
+
- Productivity values: [specific numbers]
|
| 702 |
+
- Yields: Yx/s=[value], Yp/x=[value]
|
| 703 |
+
|
| 704 |
+
4. **QUICK COMPARISON TABLE**
|
| 705 |
+
| Rank | Model | R² | RMSE | Best Application |
|
| 706 |
+
|------|-------|-----|------|------------------|
|
| 707 |
+
| 1 | [Name]| [#] | [#] | [Use case] |
|
| 708 |
+
| 2 | [Name]| [#] | [#] | [Use case] |
|
| 709 |
+
| 3 | [Name]| [#] | [#] | [Use case] |
|
| 710 |
+
|
| 711 |
+
5. **PRACTICAL CONCLUSIONS**
|
| 712 |
+
- Use [Model X] for biomass prediction (R²=[value])
|
| 713 |
+
- Use [Model Y] for substrate monitoring (R²=[value])
|
| 714 |
+
- Critical parameters for control: [list with values]
|
| 715 |
+
- Scale-up recommendation: [specific values]
|
| 716 |
+
|
| 717 |
+
Keep it concise but include ALL model names and key numerical values.
|
| 718 |
+
"""
|
| 719 |
|
| 720 |
try:
|
| 721 |
response = self.client.messages.create(
|
|
|
|
| 727 |
}]
|
| 728 |
)
|
| 729 |
|
| 730 |
+
# Análisis adicional para generar código con valores numéricos reales
|
| 731 |
code_prompt = f"""
|
| 732 |
{lang_prefix}
|
| 733 |
|
| 734 |
+
Based on the analysis and this actual data:
|
| 735 |
+
{data.to_string()}
|
| 736 |
|
| 737 |
+
Generate Python code that:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
+
1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
|
| 740 |
+
2. Implements the best models identified with their EXACT parameters
|
| 741 |
+
3. Includes visualization functions that use the REAL data values
|
| 742 |
+
4. Shows comparative analysis with the SPECIFIC numbers from the results
|
|
|
|
|
|
|
| 743 |
|
| 744 |
+
The code must include:
|
| 745 |
+
- Data loading section with the actual values hardcoded as example
|
| 746 |
+
- Model implementation with the exact parameter values found
|
| 747 |
+
- Visualization showing the actual R², RMSE values in graphs
|
| 748 |
+
- Comparison functions using the real numerical data
|
| 749 |
+
- Predictions using the best model's actual parameters
|
| 750 |
+
|
| 751 |
+
Make sure to include comments indicating which model won and why, with its exact statistics.
|
| 752 |
+
|
| 753 |
+
Format: Complete, executable Python code with actual data values embedded.
|
| 754 |
"""
|
| 755 |
|
| 756 |
code_response = self.client.messages.create(
|
|
|
|
| 758 |
max_tokens=3000,
|
| 759 |
messages=[{
|
| 760 |
"role": "user",
|
| 761 |
+
"content": code_prompt
|
| 762 |
}]
|
| 763 |
)
|
| 764 |
|
|
|
|
| 772 |
"metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
|
| 773 |
for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
|
| 774 |
"mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
|
| 775 |
+
"mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
|
| 776 |
+
"datos_completos": data_dict # Incluir todos los datos para el código
|
| 777 |
}
|
| 778 |
}
|
| 779 |
|
|
|
|
| 831 |
return analysis_text, code_text
|
| 832 |
|
| 833 |
def generate_implementation_code(analysis_results: str) -> str:
|
| 834 |
+
"""Genera código de implementación con valores numéricos del análisis"""
|
| 835 |
code = """
|
| 836 |
import numpy as np
|
| 837 |
import pandas as pd
|
|
|
|
| 857 |
self.best_models = {}
|
| 858 |
self.model_rankings = {}
|
| 859 |
|
| 860 |
+
def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
|
| 861 |
+
\"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
|
| 862 |
+
if data_dict:
|
| 863 |
+
self.results_df = pd.DataFrame(data_dict)
|
| 864 |
+
elif file_path:
|
| 865 |
+
if file_path.endswith('.csv'):
|
| 866 |
+
self.results_df = pd.read_csv(file_path)
|
| 867 |
+
else:
|
| 868 |
+
self.results_df = pd.read_excel(file_path)
|
| 869 |
|
| 870 |
print(f"✅ Data loaded: {len(self.results_df)} models")
|
| 871 |
print(f"📊 Available columns: {list(self.results_df.columns)}")
|
|
|
|
| 932 |
# Sort by ranking
|
| 933 |
comparison = comparison.sort_values('Ranking')
|
| 934 |
|
| 935 |
+
# Identify best models by category
|
| 936 |
+
if 'Type' in comparison.columns:
|
| 937 |
+
for model_type in comparison['Type'].unique():
|
| 938 |
+
type_models = comparison[comparison['Type'] == model_type]
|
| 939 |
+
if not type_models.empty:
|
| 940 |
+
best_idx = type_models['Score'].idxmax()
|
| 941 |
+
self.best_models[model_type] = type_models.loc[best_idx]
|
| 942 |
+
|
| 943 |
+
# Best overall model
|
| 944 |
best_idx = comparison['Score'].idxmax()
|
| 945 |
self.best_models['overall'] = comparison.loc[best_idx]
|
| 946 |
|
| 947 |
+
# Print comparison table with actual values
|
| 948 |
print("\\n" + "="*80)
|
| 949 |
+
print("📊 MODEL COMPARISON TABLE - ACTUAL RESULTS")
|
| 950 |
print("="*80)
|
| 951 |
|
| 952 |
print(f"\\n{'Rank':<6} {'Model':<20} {'R²':<8} {'RMSE':<10} {'AIC':<10} {'BIC':<10} {'Score':<8}")
|
|
|
|
| 972 |
print(f"{'N/A':<10} ", end="")
|
| 973 |
print(f"{score:<8.4f}")
|
| 974 |
|
| 975 |
+
print("\\n🏆 BEST MODELS BY CATEGORY:")
|
| 976 |
+
for category, model_data in self.best_models.items():
|
| 977 |
+
if isinstance(model_data, pd.Series):
|
| 978 |
+
print(f"\\n{category.upper()}:")
|
| 979 |
+
print(f" Model: {model_data.get(model_col, 'Unknown')}")
|
| 980 |
+
print(f" R² = {model_data.get(r2_col, 0):.4f}")
|
| 981 |
+
print(f" RMSE = {model_data.get(rmse_col, 0):.4f}")
|
| 982 |
|
| 983 |
self.model_rankings = comparison
|
| 984 |
return comparison
|
| 985 |
+
|
| 986 |
+
def visualize_comparison(self):
|
| 987 |
+
\"\"\"Create visualization of model comparison with actual data\"\"\"
|
| 988 |
+
if self.model_rankings is None:
|
| 989 |
+
raise ValueError("First run analyze_model_quality()")
|
| 990 |
+
|
| 991 |
+
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| 992 |
+
fig.suptitle('Model Comparison - Actual Fitting Results', fontsize=16)
|
| 993 |
+
|
| 994 |
+
# 1. R² comparison
|
| 995 |
+
ax1 = axes[0, 0]
|
| 996 |
+
models = self.model_rankings.get('Model', self.model_rankings.index)
|
| 997 |
+
r2_values = self.model_rankings.get('R2', [])
|
| 998 |
+
ax1.bar(range(len(models)), r2_values, color='skyblue')
|
| 999 |
+
ax1.set_xlabel('Models')
|
| 1000 |
+
ax1.set_ylabel('R²')
|
| 1001 |
+
ax1.set_title('R² Comparison')
|
| 1002 |
+
ax1.set_xticks(range(len(models)))
|
| 1003 |
+
ax1.set_xticklabels(models, rotation=45, ha='right')
|
| 1004 |
+
ax1.axhline(y=0.95, color='r', linestyle='--', label='Excellent fit (0.95)')
|
| 1005 |
+
ax1.legend()
|
| 1006 |
+
|
| 1007 |
+
# Add actual values on bars
|
| 1008 |
+
for i, v in enumerate(r2_values):
|
| 1009 |
+
ax1.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
|
| 1010 |
+
|
| 1011 |
+
# 2. RMSE comparison
|
| 1012 |
+
ax2 = axes[0, 1]
|
| 1013 |
+
rmse_values = self.model_rankings.get('RMSE', [])
|
| 1014 |
+
ax2.bar(range(len(models)), rmse_values, color='salmon')
|
| 1015 |
+
ax2.set_xlabel('Models')
|
| 1016 |
+
ax2.set_ylabel('RMSE')
|
| 1017 |
+
ax2.set_title('RMSE Comparison (Lower is Better)')
|
| 1018 |
+
ax2.set_xticks(range(len(models)))
|
| 1019 |
+
ax2.set_xticklabels(models, rotation=45, ha='right')
|
| 1020 |
+
|
| 1021 |
+
# Add actual values on bars
|
| 1022 |
+
for i, v in enumerate(rmse_values):
|
| 1023 |
+
ax2.text(i, v + 0.001, f'{v:.3f}', ha='center', va='bottom')
|
| 1024 |
+
|
| 1025 |
+
# 3. Combined score
|
| 1026 |
+
ax3 = axes[1, 0]
|
| 1027 |
+
scores = self.model_rankings.get('Score', [])
|
| 1028 |
+
ax3.bar(range(len(models)), scores, color='lightgreen')
|
| 1029 |
+
ax3.set_xlabel('Models')
|
| 1030 |
+
ax3.set_ylabel('Combined Score')
|
| 1031 |
+
ax3.set_title('Overall Model Score')
|
| 1032 |
+
ax3.set_xticks(range(len(models)))
|
| 1033 |
+
ax3.set_xticklabels(models, rotation=45, ha='right')
|
| 1034 |
+
|
| 1035 |
+
# 4. Ranking visualization
|
| 1036 |
+
ax4 = axes[1, 1]
|
| 1037 |
+
rankings = self.model_rankings.get('Ranking', [])
|
| 1038 |
+
ax4.scatter(r2_values, rmse_values, s=100, c=rankings, cmap='viridis')
|
| 1039 |
+
ax4.set_xlabel('R²')
|
| 1040 |
+
ax4.set_ylabel('RMSE')
|
| 1041 |
+
ax4.set_title('R² vs RMSE (color = ranking)')
|
| 1042 |
+
|
| 1043 |
+
# Annotate best model
|
| 1044 |
+
best_model = self.best_models.get('overall')
|
| 1045 |
+
if isinstance(best_model, pd.Series):
|
| 1046 |
+
best_r2 = best_model.get('R2', 0)
|
| 1047 |
+
best_rmse = best_model.get('RMSE', 0)
|
| 1048 |
+
best_name = best_model.get('Model', 'Best')
|
| 1049 |
+
ax4.annotate(f'Best: {best_name}',
|
| 1050 |
+
xy=(best_r2, best_rmse),
|
| 1051 |
+
xytext=(best_r2-0.05, best_rmse+0.01),
|
| 1052 |
+
arrowprops=dict(arrowstyle='->', color='red'))
|
| 1053 |
+
|
| 1054 |
+
plt.tight_layout()
|
| 1055 |
+
plt.show()
|
| 1056 |
|
| 1057 |
+
# Example usage with actual data
|
| 1058 |
if __name__ == "__main__":
|
| 1059 |
print("🧬 Biotechnological Model Comparative Analysis System")
|
| 1060 |
print("="*60)
|
| 1061 |
|
| 1062 |
+
# Example data structure (replace with your actual data)
|
| 1063 |
+
example_data = {
|
| 1064 |
+
'Model': ['Monod', 'Logistic', 'Gompertz', 'Modified_Gompertz'],
|
| 1065 |
+
'Type': ['Substrate', 'Biomass', 'Biomass', 'Biomass'],
|
| 1066 |
+
'R2': [0.9845, 0.9912, 0.9956, 0.9889],
|
| 1067 |
+
'RMSE': [0.0234, 0.0189, 0.0145, 0.0201],
|
| 1068 |
+
'AIC': [-45.23, -48.91, -52.34, -47.56],
|
| 1069 |
+
'BIC': [-42.11, -45.79, -49.22, -44.44],
|
| 1070 |
+
'mu_max': [0.45, 0.48, 0.52, 0.49],
|
| 1071 |
+
'Ks': [2.1, None, None, None],
|
| 1072 |
+
'Xmax': [None, 12.5, 13.1, 12.8]
|
| 1073 |
+
}
|
| 1074 |
+
|
| 1075 |
# Create analyzer
|
| 1076 |
analyzer = ComparativeModelAnalyzer()
|
| 1077 |
|
| 1078 |
+
# Load data
|
| 1079 |
+
analyzer.load_results(data_dict=example_data)
|
| 1080 |
+
|
| 1081 |
+
# Analyze
|
| 1082 |
+
results = analyzer.analyze_model_quality()
|
| 1083 |
+
|
| 1084 |
+
# Visualize
|
| 1085 |
+
analyzer.visualize_comparison()
|
| 1086 |
|
| 1087 |
+
print("\\n✨ Analysis complete! Best models identified with actual parameters.")
|
| 1088 |
"""
|
| 1089 |
|
| 1090 |
return code
|