Spaces:

chshan
/

RLAnOxPeptide

Sleeping

App Files Files Community

chshan commited on Jul 20

Commit

f1d641f

verified ·

1 Parent(s): 6a02daf

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -106

app.py CHANGED Viewed

@@ -2,22 +2,7 @@
 # -*- coding: utf-8 -*-
 # app.py - RLAnOxPeptide Gradio Web Application
-# This script combines logic from predictor.py, generator.py, and the original app.py
-# into a single, self-contained file for a Hugging Face Space.
-#
-# REQUIRED FILE STRUCTURE IN HUGGING FACE REPO:
-# .
-# ├── app.py                  (This file)
-# ├── feature_extract.py      (CRITICAL: This file with your `extract_features` function MUST be present)
-# ├── checkpoints/
-# │   ├── final_rl_model_logitp0.1_calibrated_FINETUNED_PROTT5.pth
-# │   └── scaler_FINETUNED_PROTT5.pkl
-# ├── generator_checkpoints_v3.6/
-# │   └── final_generator_model.pth
-# ├── prott5/
-# │   └── model/
-# │       └── finetuned_prott5.bin (Your fine-tuned feature extractor weights)
-# └── requirements.txt
 import os
 import torch
@@ -29,15 +14,16 @@ import gradio as gr
 from sklearn.cluster import KMeans
 from tqdm import tqdm
 import transformers
-# Suppress verbose logging from transformers, which can clutter the app logs
 transformers.logging.set_verbosity_error()
 # --------------------------------------------------------------------------
 # SECTION 1: CORE CLASS AND FUNCTION DEFINITIONS
 # --------------------------------------------------------------------------
-# --- Vocabulary Definition (Consistent across all scripts) ---
 AMINO_ACIDS = "ACDEFGHIKLMNPQRSTVWY"
 token2id = {aa: i + 2 for i, aa in enumerate(AMINO_ACIDS)}
 token2id["<PAD>"] = 0
@@ -47,19 +33,15 @@ VOCAB_SIZE = len(token2id)
 # --- Feature Extractor Model Class (For ProtT5) ---
-# MODIFIED: This class now loads the base model from the Hugging Face Hub ID
-# and then applies your local fine-tuned weights.
 class FeatureProtT5Model:
     def __init__(self, base_model_id, finetuned_weights_path=None):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Initializing ProtT5 for feature extraction on device: {self.device}")
-        # Load the base model architecture and tokenizer directly from the Hub ID.
         print(f"Loading base model and tokenizer from '{base_model_id}'...")
         self.tokenizer = transformers.T5Tokenizer.from_pretrained(base_model_id, do_lower_case=False)
         self.model = transformers.T5EncoderModel.from_pretrained(base_model_id)
-        # If a path to a fine-tuned weights file is provided, load and apply those weights.
         if finetuned_weights_path and os.path.exists(finetuned_weights_path):
             print(f"Applying local fine-tuned weights from: {finetuned_weights_path}")
             state_dict = torch.load(finetuned_weights_path, map_location=self.device)
@@ -71,46 +53,28 @@ class FeatureProtT5Model:
         self.model.to(self.device)
         self.model.eval()
-    # ✅ NEWLY ADDED METHOD: This provides the functionality to encode sequences.
     def encode(self, sequence):
-        """
-        Takes a peptide sequence string and returns its ProtT5 embedding.
-        """
-        # The extract_features function expects this method to exist.
         if not sequence or not isinstance(sequence, str):
-            # Return a zero vector of the correct shape if input is invalid
             return np.zeros((1, 1024), dtype=np.float32)
-        # ProtT5 expects amino acids to be separated by spaces.
         seq_spaced = " ".join(list(sequence))
-        # Tokenize the input sequence.
         encoded_input = self.tokenizer(seq_spaced, return_tensors='pt', padding=True, truncation=True)
         encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}
-        # Get embeddings from the model.
         with torch.no_grad():
             embedding = self.model(**encoded_input).last_hidden_state
-        # Move the embedding to CPU and convert to a NumPy array.
-        # Squeeze to remove the batch dimension.
         emb_np = embedding.squeeze(0).cpu().numpy()
-        # Handle cases where the embedding might be empty.
         return emb_np if emb_np.shape[0] > 0 else np.zeros((1, 1024), dtype=np.float32)
 # --- Predictor Model Architecture ---
-# This is the antioxidant activity predictor model. Its architecture must
-# exactly match the architecture used to save the checkpoint file.
 class AntioxidantPredictor(nn.Module):
     def __init__(self, input_dim=1914, transformer_layers=3, transformer_heads=4, transformer_dropout=0.1):
         super(AntioxidantPredictor, self).__init__()
         self.prott5_dim = 1024
         self.handcrafted_dim = input_dim - self.prott5_dim
         self.seq_len = 16
-        self.prott5_feature_dim = 64 # 16 * 64 = 1024
         encoder_layer = nn.TransformerEncoderLayer(d_model=self.prott5_feature_dim, nhead=transformer_heads, dropout=transformer_dropout, batch_first=True)
         self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=transformer_layers)
@@ -122,26 +86,20 @@ class AntioxidantPredictor(nn.Module):
     def forward(self, x):
         batch_size = x.size(0)
-        # The input 'x' is a flat 1914-dim vector from extract_features()
         prot_t5_features = x[:, :self.prott5_dim]
         handcrafted_features = x[:, self.prott5_dim:]
-        # Reshape the first 1024 features back into a sequence representation
         prot_t5_seq = prot_t5_features.view(batch_size, self.seq_len, self.prott5_feature_dim)
         encoded_seq = self.transformer_encoder(prot_t5_seq)
         refined_prott5 = encoded_seq.mean(dim=1)
         fused_features = torch.cat([refined_prott5, handcrafted_features], dim=1)
         fused_output = self.fusion_fc(fused_features)
         logits = self.classifier(fused_output)
         return logits / self.temperature
     def get_temperature(self):
         return self.temperature.item()
-# --- Generator Model Architecture (from generator.py) ---
 class ProtT5Generator(nn.Module):
     def __init__(self, vocab_size, embed_dim=512, num_layers=6, num_heads=8, dropout=0.1):
         super(ProtT5Generator, self).__init__()
@@ -167,7 +125,6 @@ class ProtT5Generator(nn.Module):
             next_logits = logits[:, -1, :] / temperature
             if generated.size(1) < min_decoded_length:
                 next_logits[:, self.eos_token_id] = -float("inf")
             probs = torch.softmax(next_logits, dim=-1)
             next_token = torch.multinomial(probs, num_samples=1)
             generated = torch.cat((generated, next_token), dim=1)
@@ -177,7 +134,7 @@ class ProtT5Generator(nn.Module):
         sequences = []
         for ids_tensor in token_ids_batch:
             seq = ""
-            for token_id in ids_tensor.tolist()[1:]: # Skip the random start token
                 if token_id == self.eos_token_id: break
                 if token_id == self.pad_token_id: continue
                 seq += id2token.get(token_id, "")
@@ -185,15 +142,12 @@ class ProtT5Generator(nn.Module):
         return sequences
 # --- CRITICAL DEPENDENCY: feature_extract.py ---
-# This application requires a function named `extract_features` to convert a peptide
-# sequence into a 1914-dimensional feature vector for the prediction model.
-# This function must be defined in a file named `feature_extract.py` in the repository root.
 try:
     from feature_extract import extract_features
 except ImportError:
-    raise gr.Error("Fatal Error: `feature_extract.py` not found. This file is required for the application to run. Please upload it to your repository.")
-# --- Clustering Logic (from generator.py) ---
 def cluster_sequences(generator, sequences, num_clusters, device):
     if not sequences or len(sequences) < num_clusters:
         return sequences[:num_clusters]
@@ -203,7 +157,7 @@ def cluster_sequences(generator, sequences, num_clusters, device):
         max_len = max((len(seq) for seq in sequences), default=0) + 2
         for seq in sequences:
             ids = [token2id.get(aa, 0) for aa in seq] + [generator.eos_token_id]
-            ids = [np.random.randint(2, VOCAB_SIZE)] + ids # Prepend a start token
             ids += [token2id["<PAD>"]] * (max_len - len(ids))
             token_ids_list.append(ids)
@@ -233,12 +187,10 @@ print("--- Starting Application: Loading all models and dependencies ---")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 try:
-    # --- Define file paths relative to the repository root ---
     PREDICTOR_CHECKPOINT_PATH = "checkpoints/final_rl_model_logitp0.1_calibrated_FINETUNED_PROTT5.pth"
     SCALER_PATH = "checkpoints/scaler_FINETUNED_PROTT5.pkl"
     GENERATOR_CHECKPOINT_PATH = "generator_checkpoints_v3.6/final_generator_model.pth"
-    # Define the base model ID from the Hub and the path to your local fine-tuned weights.
     PROTT5_BASE_MODEL_ID = "Rostlab/prot_t5_xl_uniref50"
     FINETUNED_PROTT5_FOR_FEATURES_PATH = "prott5/model/finetuned_prott5.bin"
@@ -254,7 +206,6 @@ try:
     print(f"Loading Scaler from: {SCALER_PATH}")
     SCALER = joblib.load(SCALER_PATH)
     print("Loading ProtT5 Feature Extractor...")
-    # Pass the Hub ID to the updated class to load the base model.
     PROTT5_EXTRACTOR = FeatureProtT5Model(
         base_model_id=PROTT5_BASE_MODEL_ID,
         finetuned_weights_path=FINETUNED_PROTT5_FOR_FEATURES_PATH
@@ -280,16 +231,11 @@ except Exception as e:
 # --------------------------------------------------------------------------
 def predict_peptide_wrapper(sequence_str):
-    """Handles the prediction for a single peptide sequence from the UI."""
     if not sequence_str or not isinstance(sequence_str, str) or any(c not in AMINO_ACIDS for c in sequence_str.upper()):
         return "0.0000", "Error: Please enter a valid peptide sequence using standard amino acids (ACDEFGHIKLMNPQRSTVWY)."
     try:
-        # Use the imported extract_features function.
-        # The L_fixed and d_model_pe values are taken from your original predictor.py arguments.
         features = extract_features(sequence_str.upper(), PROTT5_EXTRACTOR, L_fixed=29, d_model_pe=16)
-        # Scale the features using the loaded scaler
         scaled_features = SCALER.transform(features.reshape(1, -1))
         with torch.no_grad():
@@ -304,57 +250,74 @@ def predict_peptide_wrapper(sequence_str):
         print(f"Prediction Error for sequence '{sequence_str}': {e}")
         return "N/A", f"An error occurred during prediction: {e}"
-def generate_peptide_wrapper(num_to_generate, min_len, max_len, temperature, diversity_factor, progress=gr.Progress(track_tqdm=True)):
-    """Handles the full generation-validation-clustering pipeline."""
     num_to_generate = int(num_to_generate)
     min_len = int(min_len)
     max_len = int(max_len)
     try:
-        # Step 1: Generate a large, unique pool of candidate sequences
-        target_pool_size = int(num_to_generate * diversity_factor)
-        unique_seqs = set()
-        pbar_desc = "Step 1/3: Generating candidate sequences"
-        with tqdm(total=target_pool_size, desc=pbar_desc) as pbar:
-            while len(unique_seqs) < target_pool_size:
-                batch_size = max(1, (target_pool_size - len(unique_seqs)))
-                with torch.no_grad():
-                    generated_tokens = GENERATOR_MODEL.sample(
-                        batch_size=batch_size, max_length=max_len, device=DEVICE,
-                        temperature=temperature, min_decoded_length=min_len
-                    )
-                decoded_sequences = GENERATOR_MODEL.decode(generated_tokens)
-                initial_count = len(unique_seqs)
-                for seq in decoded_sequences:
-                    if min_len <= len(seq) <= max_len:
-                        unique_seqs.add(seq)
-                pbar.update(len(unique_seqs) - initial_count)
-        candidate_seqs = list(unique_seqs)
-        # Step 2: Validate the generated sequences and filter for high probability
-        validated_pool = {}
-        for seq in tqdm(candidate_seqs, desc="Step 2/3: Validating generated sequences"):
-            prob_str, _ = predict_peptide_wrapper(seq)
-            try:
-                prob = float(prob_str)
-                if prob > 0.90:
-                    validated_pool[seq] = prob
-            except (ValueError, TypeError):
-                continue
         if not validated_pool:
-            return pd.DataFrame([{"Sequence": "No high-activity peptides (>0.9 prob) were generated. Try increasing the Diversity Factor or changing the Temperature.", "Predicted Probability": "N/A"}])
         high_quality_sequences = list(validated_pool.keys())
-        # Step 3: Cluster to ensure diversity in the final set
-        progress(1.0, desc="Step 3/3: Clustering for diversity...")
         final_diverse_seqs = cluster_sequences(GENERATOR_MODEL, high_quality_sequences, num_to_generate, DEVICE)
-        # Step 4: Format final results into a DataFrame
         final_results = [(seq, f"{validated_pool[seq]:.4f}") for seq in final_diverse_seqs]
         final_results.sort(key=lambda x: float(x[1]), reverse=True)
@@ -401,8 +364,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RLAnOxPeptide") as demo:
             with gr.Column():
                 with gr.Row():
                     num_input = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="Number of Final Peptides to Generate")
-                    min_len_input = gr.Slider(minimum=3, maximum=10, value=3, step=1, label="Minimum Length")
-                    max_len_input = gr.Slider(minimum=10, maximum=20, value=20, step=1, label="Maximum Length")
                 with gr.Row():
                     temp_input = gr.Slider(minimum=0.5, maximum=3.0, value=2.5, step=0.1, label="Temperature (Higher = More random)")
                     diversity_input = gr.Slider(minimum=1.1, maximum=5.0, value=1.5, step=0.1, label="Diversity Factor (Larger initial pool for clustering)")
@@ -410,6 +374,15 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RLAnOxPeptide") as demo:
             generate_button = gr.Button("Generate Peptides", variant="primary")
             results_output = gr.DataFrame(headers=["Sequence", "Predicted Probability"], label="Generated & Validated Peptides (>90% Probability)", wrap=True)
             generate_button.click(
                 fn=generate_peptide_wrapper,
                 inputs=[num_input, min_len_input, max_len_input, temp_input, diversity_input],

 # -*- coding: utf-8 -*-
 # app.py - RLAnOxPeptide Gradio Web Application
+# Final version incorporating user feedback on generator logic and UI controls.
 import os
 import torch
 from sklearn.cluster import KMeans
 from tqdm import tqdm
 import transformers
+import time
+# Suppress verbose logging from transformers
 transformers.logging.set_verbosity_error()
 # --------------------------------------------------------------------------
 # SECTION 1: CORE CLASS AND FUNCTION DEFINITIONS
 # --------------------------------------------------------------------------
+# --- Vocabulary Definition ---
 AMINO_ACIDS = "ACDEFGHIKLMNPQRSTVWY"
 token2id = {aa: i + 2 for i, aa in enumerate(AMINO_ACIDS)}
 token2id["<PAD>"] = 0
 # --- Feature Extractor Model Class (For ProtT5) ---
 class FeatureProtT5Model:
     def __init__(self, base_model_id, finetuned_weights_path=None):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Initializing ProtT5 for feature extraction on device: {self.device}")
         print(f"Loading base model and tokenizer from '{base_model_id}'...")
         self.tokenizer = transformers.T5Tokenizer.from_pretrained(base_model_id, do_lower_case=False)
         self.model = transformers.T5EncoderModel.from_pretrained(base_model_id)
         if finetuned_weights_path and os.path.exists(finetuned_weights_path):
             print(f"Applying local fine-tuned weights from: {finetuned_weights_path}")
             state_dict = torch.load(finetuned_weights_path, map_location=self.device)
         self.model.to(self.device)
         self.model.eval()
     def encode(self, sequence):
         if not sequence or not isinstance(sequence, str):
             return np.zeros((1, 1024), dtype=np.float32)
         seq_spaced = " ".join(list(sequence))
         encoded_input = self.tokenizer(seq_spaced, return_tensors='pt', padding=True, truncation=True)
         encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}
         with torch.no_grad():
             embedding = self.model(**encoded_input).last_hidden_state
         emb_np = embedding.squeeze(0).cpu().numpy()
         return emb_np if emb_np.shape[0] > 0 else np.zeros((1, 1024), dtype=np.float32)
 # --- Predictor Model Architecture ---
 class AntioxidantPredictor(nn.Module):
     def __init__(self, input_dim=1914, transformer_layers=3, transformer_heads=4, transformer_dropout=0.1):
         super(AntioxidantPredictor, self).__init__()
         self.prott5_dim = 1024
         self.handcrafted_dim = input_dim - self.prott5_dim
         self.seq_len = 16
+        self.prott5_feature_dim = 64
         encoder_layer = nn.TransformerEncoderLayer(d_model=self.prott5_feature_dim, nhead=transformer_heads, dropout=transformer_dropout, batch_first=True)
         self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=transformer_layers)
     def forward(self, x):
         batch_size = x.size(0)
         prot_t5_features = x[:, :self.prott5_dim]
         handcrafted_features = x[:, self.prott5_dim:]
         prot_t5_seq = prot_t5_features.view(batch_size, self.seq_len, self.prott5_feature_dim)
         encoded_seq = self.transformer_encoder(prot_t5_seq)
         refined_prott5 = encoded_seq.mean(dim=1)
         fused_features = torch.cat([refined_prott5, handcrafted_features], dim=1)
         fused_output = self.fusion_fc(fused_features)
         logits = self.classifier(fused_output)
         return logits / self.temperature
     def get_temperature(self):
         return self.temperature.item()
+# --- Generator Model Architecture ---
 class ProtT5Generator(nn.Module):
     def __init__(self, vocab_size, embed_dim=512, num_layers=6, num_heads=8, dropout=0.1):
         super(ProtT5Generator, self).__init__()
             next_logits = logits[:, -1, :] / temperature
             if generated.size(1) < min_decoded_length:
                 next_logits[:, self.eos_token_id] = -float("inf")
             probs = torch.softmax(next_logits, dim=-1)
             next_token = torch.multinomial(probs, num_samples=1)
             generated = torch.cat((generated, next_token), dim=1)
         sequences = []
         for ids_tensor in token_ids_batch:
             seq = ""
+            for token_id in ids_tensor.tolist()[1:]:
                 if token_id == self.eos_token_id: break
                 if token_id == self.pad_token_id: continue
                 seq += id2token.get(token_id, "")
         return sequences
 # --- CRITICAL DEPENDENCY: feature_extract.py ---
 try:
     from feature_extract import extract_features
 except ImportError:
+    raise gr.Error("Fatal Error: `feature_extract.py` not found. This file is required. Please upload it to your repository.")
+# --- Clustering Logic ---
 def cluster_sequences(generator, sequences, num_clusters, device):
     if not sequences or len(sequences) < num_clusters:
         return sequences[:num_clusters]
         max_len = max((len(seq) for seq in sequences), default=0) + 2
         for seq in sequences:
             ids = [token2id.get(aa, 0) for aa in seq] + [generator.eos_token_id]
+            ids = [np.random.randint(2, VOCAB_SIZE)] + ids
             ids += [token2id["<PAD>"]] * (max_len - len(ids))
             token_ids_list.append(ids)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 try:
+    # --- Define file paths ---
     PREDICTOR_CHECKPOINT_PATH = "checkpoints/final_rl_model_logitp0.1_calibrated_FINETUNED_PROTT5.pth"
     SCALER_PATH = "checkpoints/scaler_FINETUNED_PROTT5.pkl"
     GENERATOR_CHECKPOINT_PATH = "generator_checkpoints_v3.6/final_generator_model.pth"
     PROTT5_BASE_MODEL_ID = "Rostlab/prot_t5_xl_uniref50"
     FINETUNED_PROTT5_FOR_FEATURES_PATH = "prott5/model/finetuned_prott5.bin"
     print(f"Loading Scaler from: {SCALER_PATH}")
     SCALER = joblib.load(SCALER_PATH)
     print("Loading ProtT5 Feature Extractor...")
     PROTT5_EXTRACTOR = FeatureProtT5Model(
         base_model_id=PROTT5_BASE_MODEL_ID,
         finetuned_weights_path=FINETUNED_PROTT5_FOR_FEATURES_PATH
 # --------------------------------------------------------------------------
 def predict_peptide_wrapper(sequence_str):
     if not sequence_str or not isinstance(sequence_str, str) or any(c not in AMINO_ACIDS for c in sequence_str.upper()):
         return "0.0000", "Error: Please enter a valid peptide sequence using standard amino acids (ACDEFGHIKLMNPQRSTVWY)."
     try:
         features = extract_features(sequence_str.upper(), PROTT5_EXTRACTOR, L_fixed=29, d_model_pe=16)
         scaled_features = SCALER.transform(features.reshape(1, -1))
         with torch.no_grad():
         print(f"Prediction Error for sequence '{sequence_str}': {e}")
         return "N/A", f"An error occurred during prediction: {e}"
+def generate_peptide_wrapper(num_to_generate, min_len, max_len, temperature, diversity_factor, progress=gr.Progress()):
+    """
+    Handles the full generation-validation-clustering pipeline with a loop to ensure
+    the target number of peptides is generated.
+    """
     num_to_generate = int(num_to_generate)
     min_len = int(min_len)
     max_len = int(max_len)
+    # Safety check for length
+    if min_len > max_len:
+        gr.Warning("Minimum Length cannot be greater than Maximum Length. Adjusting min_len = max_len.")
+        min_len = max_len
     try:
+        validated_pool = {}  # Use a dictionary to store unique sequences and their probabilities
+        attempts = 0
+        max_attempts = 20  # Safety break to prevent infinite loops
+        generation_batch_size = 200 # Number of sequences to generate in each attempt
+        while len(validated_pool) < num_to_generate and attempts < max_attempts:
+            progress(len(validated_pool) / num_to_generate, desc=f"Found {len(validated_pool)} / {num_to_generate} peptides. (Attempt {attempts+1}/{max_attempts})")
+            # Generate a batch of candidate sequences
+            with torch.no_grad():
+                generated_tokens = GENERATOR_MODEL.sample(
+                    batch_size=generation_batch_size, max_length=max_len, device=DEVICE,
+                    temperature=temperature, min_decoded_length=min_len
+                )
+            decoded_sequences = GENERATOR_MODEL.decode(generated_tokens)
+            # Filter for length and uniqueness
+            new_candidates = []
+            for seq in decoded_sequences:
+                if min_len <= len(seq) <= max_len:
+                    if seq not in validated_pool:
+                        new_candidates.append(seq)
+            # Validate the new, unique candidates
+            for seq in new_candidates:
+                prob_str, _ = predict_peptide_wrapper(seq)
+                try:
+                    prob = float(prob_str)
+                    if prob > 0.90:
+                        validated_pool[seq] = prob
+                        # Check if we have reached the target
+                        if len(validated_pool) >= num_to_generate:
+                            break
+                except (ValueError, TypeError):
+                    continue
+            attempts += 1
+            if len(validated_pool) >= num_to_generate:
+                break
+        progress(1.0, desc=f"Collected {len(validated_pool)} high-quality peptides. Clustering for diversity...")
+        time.sleep(1)
         if not validated_pool:
+            return pd.DataFrame([{"Sequence": "Could not generate any high-activity peptides (>0.9 prob) with the current settings. Try different parameters.", "Predicted Probability": "N/A"}])
+        # --- Final Processing ---
         high_quality_sequences = list(validated_pool.keys())
+        # Cluster to ensure diversity, selecting up to the target number
         final_diverse_seqs = cluster_sequences(GENERATOR_MODEL, high_quality_sequences, num_to_generate, DEVICE)
+        # Format final results into a DataFrame
         final_results = [(seq, f"{validated_pool[seq]:.4f}") for seq in final_diverse_seqs]
         final_results.sort(key=lambda x: float(x[1]), reverse=True)
             with gr.Column():
                 with gr.Row():
                     num_input = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="Number of Final Peptides to Generate")
+                    # ✅ MODIFIED: Length sliders both have a range of 2-20
+                    min_len_input = gr.Slider(minimum=2, maximum=20, value=3, step=1, label="Minimum Length")
+                    max_len_input = gr.Slider(minimum=2, maximum=20, value=20, step=1, label="Maximum Length")
                 with gr.Row():
                     temp_input = gr.Slider(minimum=0.5, maximum=3.0, value=2.5, step=0.1, label="Temperature (Higher = More random)")
                     diversity_input = gr.Slider(minimum=1.1, maximum=5.0, value=1.5, step=0.1, label="Diversity Factor (Larger initial pool for clustering)")
             generate_button = gr.Button("Generate Peptides", variant="primary")
             results_output = gr.DataFrame(headers=["Sequence", "Predicted Probability"], label="Generated & Validated Peptides (>90% Probability)", wrap=True)
+            # ✅ ADDED: Dynamic linking of min and max length sliders for better UX
+            def update_min_len_range(max_len):
+                return gr.Slider(maximum=max_len)
+            max_len_input.change(fn=update_min_len_range, inputs=max_len_input, outputs=min_len_input)
+            def update_max_len_range(min_len):
+                return gr.Slider(minimum=min_len)
+            min_len_input.change(fn=update_max_len_range, inputs=min_len_input, outputs=max_len_input)
             generate_button.click(
                 fn=generate_peptide_wrapper,
                 inputs=[num_input, min_len_input, max_len_input, temp_input, diversity_input],