Sentence Similarity
sentence-transformers
Safetensors
qwen3
feature-extraction
Generated from Trainer
dataset_size:500000
loss:CachedGISTEmbedLoss
text-embeddings-inference
Instructions to use CocoRoF/POLAR-Q3-0.6b-gist with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use CocoRoF/POLAR-Q3-0.6b-gist with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("CocoRoF/POLAR-Q3-0.6b-gist") sentences = [ "scramble z to retrieve negative samples, i.e. z values that should not be predicted by the model.", "def get_neg_z(self, z, cur_device):\n\n if self.opt.sampling_method == 0:\n \"\"\"carefully selecting negative samples, such that they never\n include positive samples; done individually for every time-step -->\n very slow.\"\"\"\n offset = 1\n # generate uncorrelated negative samples by using an individual random\n # offset for every index\n rand_neg_idx = torch.arange(z.size(0), device=cur_device)\n\n rand_offset = (\n torch.multinomial(\n torch.ones(z.size(0) - offset),\n self.neg_samples * z.size(0),\n replacement=True,\n )\n + offset\n )\n rand_offset = rand_offset.reshape(self.neg_samples, -1).to(cur_device)\n\n z_neg = torch.stack(\n [\n torch.index_select(\n z, 0, (rand_neg_idx + rand_offset[i]) % z.size(0)\n )\n for i in range(self.neg_samples)\n ],\n 2,\n )\n elif self.opt.sampling_method == 1:\n \"\"\"randomly selecting from all z values.\n\n can cause positive samples to be selected as negative\n samples as well (but probability is <0.1% in our\n experiments) done once for all time-steps, much faster.\n \"\"\"\n z = self.broadcast_batch_length(z)\n z_neg = torch.stack(\n [\n torch.index_select(\n z, 0, torch.randperm(z.size(0), device=cur_device)\n )\n for i in range(self.neg_samples)\n ],\n 2,\n )\n rand_neg_idx = None\n rand_offset = None\n\n elif self.opt.sampling_method == 2:\n \"\"\"randomly selecting from z values within the same sequence.\n\n can cause positive samples to be selected as negative\n samples as well done once for all time-steps, much faster.\n \"\"\"\n z_neg = []\n channel = z.size(-1)\n batch_dim = z.size(0)\n seq_len = z.size(1)\n\n for _ in range(self.neg_samples):\n rand_perm_index = torch.randperm(\n batch_dim * seq_len, device=cur_device\n ).remainder_(seq_len)\n rand_perm_index = rand_perm_index.reshape(batch_dim, seq_len)\n batch_index_offset = (\n torch.arange(0, batch_dim, device=cur_device) * seq_len\n )\n rand_perm_index += batch_index_offset[:, None]\n\n z_neg.append(\n z.reshape(-1, channel)[rand_perm_index.view(-1)].reshape(\n batch_dim, seq_len, channel\n )\n )\n\n z_neg = torch.stack(z_neg, 3)\n\n rand_neg_idx = None\n rand_offset = None\n\n else:\n raise Exception(\"Invalid sampling_method option\")\n\n return z_neg, rand_neg_idx, rand_offset", "마우스 전지방 3T3-L1세포주에 파이토케미칼을 조건에 따라 24시간 처리한 후 cell viability assay를 수행하였다.", "def _sample_neg(self, assign_result, num_expected):\n neg_inds = torch.nonzero(assign_result.gt_inds == 0)\n if neg_inds.numel() != 0:\n neg_inds = neg_inds.squeeze(1)\n if len(neg_inds) <= num_expected:\n return neg_inds\n elif self.neg_balance_thr <= 0:\n # uniform sampling among all negative samples\n return random_choice(neg_inds, num_expected)\n else:\n max_overlaps = assign_result.max_overlaps.cpu().numpy()\n # balance sampling for negative samples\n neg_set = set(neg_inds.cpu().numpy())\n easy_set = set(\n np.where(\n np.logical_and(max_overlaps >= 0,\n max_overlaps < self.neg_balance_thr))[0])\n hard_set = set(np.where(max_overlaps >= self.neg_balance_thr)[0])\n easy_neg_inds = list(easy_set & neg_set)\n hard_neg_inds = list(hard_set & neg_set)\n\n num_expected_hard = int(num_expected * self.neg_hard_fraction)\n if len(hard_neg_inds) > num_expected_hard:\n sampled_hard_inds = random_choice(hard_neg_inds,\n num_expected_hard)\n else:\n sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)\n num_expected_easy = num_expected - len(sampled_hard_inds)\n if len(easy_neg_inds) > num_expected_easy:\n sampled_easy_inds = random_choice(easy_neg_inds,\n num_expected_easy)\n else:\n sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)\n sampled_inds = np.concatenate((sampled_easy_inds,\n sampled_hard_inds))\n if len(sampled_inds) < num_expected:\n num_extra = num_expected - len(sampled_inds)\n extra_inds = np.array(list(neg_set - set(sampled_inds)))\n if len(extra_inds) > num_extra:\n extra_inds = random_choice(extra_inds, num_extra)\n sampled_inds = np.concatenate((sampled_inds, extra_inds))\n sampled_inds = torch.from_numpy(sampled_inds).long().to(\n assign_result.gt_inds.device)\n return sampled_inds" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
File size: 133 Bytes
9946f61 | 1 2 3 4 | version https://git-lfs.github.com/spec/v1
oid sha256:def76fb086971c7867b829c23a26261e38d9d74e02139253b38aeb9df8b4b50a
size 11423705
|