Spaces:

nonzeroexit
/

AMP-Classifier

Running

App Files Files Community

nonzeroexit commited on May 28

Commit

77584b9

verified ·

1 Parent(s): febb4a6

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -43

app.py CHANGED Viewed

@@ -65,31 +65,20 @@ selected_features = ["_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondarySt
 # LIME Explainer Setup
 try:
-    # Attempt to load a real sample data for LIME background if available
-    # e.g., sample_data = np.load(os.path.join(MODEL_DIR, 'sample_training_features_scaled.npy'))
     sample_data = np.random.rand(500, len(selected_features)) # Fallback: Generate random sample data
 except Exception:
     print("Warning: Could not load pre-saved sample data for LIME. Generating random sample data.")
-    sample_data = np.random.rand(500, len(selected_features)) # Generate enough samples
 explainer = LimeTabularExplainer(
     training_data=sample_data,
     feature_names=selected_features,
-    class_names=["AMP", "Non-AMP"], # Assuming 0 is AMP, 1 is Non-AMP as per model prediction
     mode="classification"
 )
 # --- Feature Extraction Function ---
 def extract_features(sequence: str) -> np.ndarray:
-    """
-    Extracts biochemical and compositional features from an amino acid sequence.
-    Args:
-        sequence (str): The amino acid sequence.
-    Returns:
-        np.ndarray: A scaled 2D numpy array of selected features (1, num_features).
-    Raises:
-        gr.Error: If the sequence is invalid or feature extraction fails.
-    """
     cleaned_sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if not (10 <= len(cleaned_sequence) <= 100):
         raise gr.Error(f"Invalid sequence length ({len(cleaned_sequence)}). Must be between 10 and 100 characters and contain only standard amino acids.")
@@ -119,17 +108,6 @@ def extract_features(sequence: str) -> np.ndarray:
 # --- MIC Prediction Function ---
 def predictmic(sequence: str, selected_bacteria_keys: list) -> dict:
-    """
-    Predicts Minimum Inhibitory Concentration (MIC) for selected bacteria using ProtBert embeddings.
-    Args:
-        sequence (str): The amino acid sequence.
-        selected_bacteria_keys (list): List of keys for bacteria to predict MIC for (e.g., ['e_coli', 'p_aeruginosa']).
-    Returns:
-        dict: A dictionary where keys are bacterium keys and values are predicted MICs in µM.
-              Returns error messages for individual bacteria if prediction fails.
-    Raises:
-        gr.Error: If ProtBert embedding fails or sequence is invalid.
-    """
     cleaned_sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if not (10 <= len(cleaned_sequence) <= 100):
         raise gr.Error(f"Invalid sequence length for MIC prediction ({len(cleaned_sequence)}). Must be between 10 and 100 characters.")
@@ -179,13 +157,6 @@ def predictmic(sequence: str, selected_bacteria_keys: list) -> dict:
 # --- LIME Plot Generation Helper ---
 def generate_lime_plot_base64(explanation_list: list) -> str:
-    """
-    Generates a LIME explanation plot and returns it as a base64 encoded PNG string.
-    Args:
-        explanation_list (list): The output from LimeExplanation.as_list().
-    Returns:
-        str: Base64 encoded PNG image string.
-    """
     if not explanation_list:
         return ""
@@ -218,11 +189,6 @@ def generate_lime_plot_base64(explanation_list: list) -> str:
 # --- Gradio API Endpoints ---
 def classify_and_interpret_amp(sequence: str) -> dict:
-    """
-    Gradio API endpoint for AMP classification and interpretability (LIME).
-    This function processes the sequence, performs classification, generates LIME explanation,
-    and formats the output as a structured dictionary for the frontend.
-    """
     try:
         features = extract_features(sequence)
@@ -240,8 +206,6 @@ def classify_and_interpret_amp(sequence: str) -> dict:
         top_features = []
         for feat_str, weight in explanation.as_list():
-            # Parse the feature string from LIME (e.g., "APAAC4 <= 0.23")
-            # This parsing is a heuristic based on LIME's default output format.
             parts = feat_str.split(" ", 1)
             feature_name = parts[0]
             condition = parts[1] if len(parts) > 1 else ""
@@ -267,10 +231,6 @@ def classify_and_interpret_amp(sequence: str) -> dict:
         raise gr.Error(f"An unexpected error occurred during AMP classification: {e}")
 def get_mic_predictions_api(sequence: str, selected_bacteria_keys: list) -> dict:
-    """
-    Gradio API endpoint for MIC prediction.
-    This function wraps the `predictmic` function to serve as a separate API endpoint.
-    """
     try:
         mic_results = predictmic(sequence, selected_bacteria_keys)
         return mic_results
@@ -312,4 +272,5 @@ with gr.Blocks() as demo:
             api_name="predict_mic"
         )
-demo.launch(share=True, enable_queue=True, show_api=True)

 # LIME Explainer Setup
 try:
     sample_data = np.random.rand(500, len(selected_features)) # Fallback: Generate random sample data
 except Exception:
     print("Warning: Could not load pre-saved sample data for LIME. Generating random sample data.")
+    sample_data = np.random.rand(500, len(selected_features))
 explainer = LimeTabularExplainer(
     training_data=sample_data,
     feature_names=selected_features,
+    class_names=["AMP", "Non-AMP"],
     mode="classification"
 )
 # --- Feature Extraction Function ---
 def extract_features(sequence: str) -> np.ndarray:
     cleaned_sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if not (10 <= len(cleaned_sequence) <= 100):
         raise gr.Error(f"Invalid sequence length ({len(cleaned_sequence)}). Must be between 10 and 100 characters and contain only standard amino acids.")
 # --- MIC Prediction Function ---
 def predictmic(sequence: str, selected_bacteria_keys: list) -> dict:
     cleaned_sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if not (10 <= len(cleaned_sequence) <= 100):
         raise gr.Error(f"Invalid sequence length for MIC prediction ({len(cleaned_sequence)}). Must be between 10 and 100 characters.")
 # --- LIME Plot Generation Helper ---
 def generate_lime_plot_base64(explanation_list: list) -> str:
     if not explanation_list:
         return ""
 # --- Gradio API Endpoints ---
 def classify_and_interpret_amp(sequence: str) -> dict:
     try:
         features = extract_features(sequence)
         top_features = []
         for feat_str, weight in explanation.as_list():
             parts = feat_str.split(" ", 1)
             feature_name = parts[0]
             condition = parts[1] if len(parts) > 1 else ""
         raise gr.Error(f"An unexpected error occurred during AMP classification: {e}")
 def get_mic_predictions_api(sequence: str, selected_bacteria_keys: list) -> dict:
     try:
         mic_results = predictmic(sequence, selected_bacteria_keys)
         return mic_results
             api_name="predict_mic"
         )
+# Corrected launch command: removed 'enable_queue'
+demo.launch(share=True, show_api=True)