Spaces:

Omartificial-Intelligence-Space
/

qwen-arabic-semantic-suite

Running on Zero

Omartificial-Intelligence-Space commited on 11 days ago

Commit

187ab5b

verified ·

1 Parent(s): 7159e40

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,26 @@ class QwenEmbedder:
         if self.projection is not None:
             self.projection = self.projection.to(device)
         return self
 @spaces.GPU(duration=120)
 def initialize_embedder(embedding_dim=768):

         if self.projection is not None:
             self.projection = self.projection.to(device)
         return self
+    def get_embeddings(self, texts: List[str], with_instruction: bool = False) -> Tensor:
+        if with_instruction:
+            task = 'Process and understand the following text'
+            texts = [get_detailed_instruct(task, text) for text in texts]
+        batch_dict = tokenize(self.tokenizer, texts, self.eod_id, self.max_length)
+        batch_dict = {k: v.to(self.model.device) for k, v in batch_dict.items()}
+        with torch.no_grad():
+            outputs = self.model(**batch_dict)
+            embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
+            # Project to desired dimension if needed
+            if self.projection is not None:
+                embeddings = self.projection(embeddings)
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings
 @spaces.GPU(duration=120)
 def initialize_embedder(embedding_dim=768):