Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -71,6 +71,36 @@ class FeatureProtT5Model:
|
|
71 |
self.model.to(self.device)
|
72 |
self.model.eval()
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# --- Predictor Model Architecture ---
|
75 |
# This is the antioxidant activity predictor model. Its architecture must
|
76 |
# exactly match the architecture used to save the checkpoint file.
|
|
|
71 |
self.model.to(self.device)
|
72 |
self.model.eval()
|
73 |
|
74 |
+
# ✅ NEWLY ADDED METHOD: This provides the functionality to encode sequences.
|
75 |
+
def encode(self, sequence):
|
76 |
+
"""
|
77 |
+
Takes a peptide sequence string and returns its ProtT5 embedding.
|
78 |
+
"""
|
79 |
+
# The extract_features function expects this method to exist.
|
80 |
+
if not sequence or not isinstance(sequence, str):
|
81 |
+
# Return a zero vector of the correct shape if input is invalid
|
82 |
+
return np.zeros((1, 1024), dtype=np.float32)
|
83 |
+
|
84 |
+
# ProtT5 expects amino acids to be separated by spaces.
|
85 |
+
seq_spaced = " ".join(list(sequence))
|
86 |
+
|
87 |
+
# Tokenize the input sequence.
|
88 |
+
encoded_input = self.tokenizer(seq_spaced, return_tensors='pt', padding=True, truncation=True)
|
89 |
+
encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}
|
90 |
+
|
91 |
+
# Get embeddings from the model.
|
92 |
+
with torch.no_grad():
|
93 |
+
embedding = self.model(**encoded_input).last_hidden_state
|
94 |
+
|
95 |
+
# Move the embedding to CPU and convert to a NumPy array.
|
96 |
+
# Squeeze to remove the batch dimension.
|
97 |
+
emb_np = embedding.squeeze(0).cpu().numpy()
|
98 |
+
|
99 |
+
# Handle cases where the embedding might be empty.
|
100 |
+
return emb_np if emb_np.shape[0] > 0 else np.zeros((1, 1024), dtype=np.float32)
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
# --- Predictor Model Architecture ---
|
105 |
# This is the antioxidant activity predictor model. Its architecture must
|
106 |
# exactly match the architecture used to save the checkpoint file.
|