Spaces:

chshan
/

RLAnOxPeptide

Sleeping

App Files Files Community

chshan commited on Jul 20

Commit

6a02daf

verified ·

1 Parent(s): 8a4f49a

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -0

app.py CHANGED Viewed

@@ -71,6 +71,36 @@ class FeatureProtT5Model:
         self.model.to(self.device)
         self.model.eval()
 # --- Predictor Model Architecture ---
 # This is the antioxidant activity predictor model. Its architecture must
 # exactly match the architecture used to save the checkpoint file.

         self.model.to(self.device)
         self.model.eval()
+    # ✅ NEWLY ADDED METHOD: This provides the functionality to encode sequences.
+    def encode(self, sequence):
+        """
+        Takes a peptide sequence string and returns its ProtT5 embedding.
+        """
+        # The extract_features function expects this method to exist.
+        if not sequence or not isinstance(sequence, str):
+            # Return a zero vector of the correct shape if input is invalid
+            return np.zeros((1, 1024), dtype=np.float32)
+        # ProtT5 expects amino acids to be separated by spaces.
+        seq_spaced = " ".join(list(sequence))
+        # Tokenize the input sequence.
+        encoded_input = self.tokenizer(seq_spaced, return_tensors='pt', padding=True, truncation=True)
+        encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}
+        # Get embeddings from the model.
+        with torch.no_grad():
+            embedding = self.model(**encoded_input).last_hidden_state
+        # Move the embedding to CPU and convert to a NumPy array.
+        # Squeeze to remove the batch dimension.
+        emb_np = embedding.squeeze(0).cpu().numpy()
+        # Handle cases where the embedding might be empty.
+        return emb_np if emb_np.shape[0] > 0 else np.zeros((1, 1024), dtype=np.float32)
 # --- Predictor Model Architecture ---
 # This is the antioxidant activity predictor model. Its architecture must
 # exactly match the architecture used to save the checkpoint file.