chshan commited on
Commit
6a02daf
·
verified ·
1 Parent(s): 8a4f49a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py CHANGED
@@ -71,6 +71,36 @@ class FeatureProtT5Model:
71
  self.model.to(self.device)
72
  self.model.eval()
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # --- Predictor Model Architecture ---
75
  # This is the antioxidant activity predictor model. Its architecture must
76
  # exactly match the architecture used to save the checkpoint file.
 
71
  self.model.to(self.device)
72
  self.model.eval()
73
 
74
+ # ✅ NEWLY ADDED METHOD: This provides the functionality to encode sequences.
75
+ def encode(self, sequence):
76
+ """
77
+ Takes a peptide sequence string and returns its ProtT5 embedding.
78
+ """
79
+ # The extract_features function expects this method to exist.
80
+ if not sequence or not isinstance(sequence, str):
81
+ # Return a zero vector of the correct shape if input is invalid
82
+ return np.zeros((1, 1024), dtype=np.float32)
83
+
84
+ # ProtT5 expects amino acids to be separated by spaces.
85
+ seq_spaced = " ".join(list(sequence))
86
+
87
+ # Tokenize the input sequence.
88
+ encoded_input = self.tokenizer(seq_spaced, return_tensors='pt', padding=True, truncation=True)
89
+ encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}
90
+
91
+ # Get embeddings from the model.
92
+ with torch.no_grad():
93
+ embedding = self.model(**encoded_input).last_hidden_state
94
+
95
+ # Move the embedding to CPU and convert to a NumPy array.
96
+ # Squeeze to remove the batch dimension.
97
+ emb_np = embedding.squeeze(0).cpu().numpy()
98
+
99
+ # Handle cases where the embedding might be empty.
100
+ return emb_np if emb_np.shape[0] > 0 else np.zeros((1, 1024), dtype=np.float32)
101
+
102
+
103
+
104
  # --- Predictor Model Architecture ---
105
  # This is the antioxidant activity predictor model. Its architecture must
106
  # exactly match the architecture used to save the checkpoint file.