Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,26 @@ class QwenEmbedder:
|
|
26 |
if self.projection is not None:
|
27 |
self.projection = self.projection.to(device)
|
28 |
return self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
@spaces.GPU(duration=120)
|
31 |
def initialize_embedder(embedding_dim=768):
|
|
|
26 |
if self.projection is not None:
|
27 |
self.projection = self.projection.to(device)
|
28 |
return self
|
29 |
+
|
30 |
+
def get_embeddings(self, texts: List[str], with_instruction: bool = False) -> Tensor:
|
31 |
+
if with_instruction:
|
32 |
+
task = 'Process and understand the following text'
|
33 |
+
texts = [get_detailed_instruct(task, text) for text in texts]
|
34 |
+
|
35 |
+
batch_dict = tokenize(self.tokenizer, texts, self.eod_id, self.max_length)
|
36 |
+
batch_dict = {k: v.to(self.model.device) for k, v in batch_dict.items()}
|
37 |
+
|
38 |
+
with torch.no_grad():
|
39 |
+
outputs = self.model(**batch_dict)
|
40 |
+
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
41 |
+
|
42 |
+
# Project to desired dimension if needed
|
43 |
+
if self.projection is not None:
|
44 |
+
embeddings = self.projection(embeddings)
|
45 |
+
|
46 |
+
embeddings = F.normalize(embeddings, p=2, dim=1)
|
47 |
+
|
48 |
+
return embeddings
|
49 |
|
50 |
@spaces.GPU(duration=120)
|
51 |
def initialize_embedder(embedding_dim=768):
|