|
|
--- |
|
|
library_name: transformers |
|
|
tags: [] |
|
|
--- |
|
|
|
|
|
# Model Card for Model ID |
|
|
|
|
|
This is an embedding model for clinical papers |
|
|
|
|
|
## How to Use |
|
|
|
|
|
```python |
|
|
from transformers import AutoTokenizer, AutoModel |
|
|
import torch |
|
|
|
|
|
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
PATH = "josh-oo/aspect-based-embeddings-v3" |
|
|
REVISION = "4f43387343acaacd9bfafec0a304c51ed140f078" |
|
|
PREFIXES = ["<participants>","<intervention>","<condition>","<outcome>"] |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(PATH, revision=REVISION) |
|
|
model = AutoModel.from_pretrained(PATH, revision=REVISION) |
|
|
|
|
|
model.register_buffer("position_ids", torch.relu(torch.arange(model.config.max_position_embeddings + len(PREFIXES)).expand((1, -1)) - len(PREFIXES)), persistent=False) |
|
|
model.register_buffer("token_type_ids", torch.zeros(model.position_ids.size(), dtype=torch.long), persistent=False) #set token type ids to 0 |
|
|
model.token_type_ids[:,1:1+len(PREFIXES)] = 1 #set prefix token type ids to 1 |
|
|
|
|
|
dummy_text = "".join(PREFIXES) + "This is a title of a medical paper" |
|
|
dummy_input = tokenizer([dummy_text], return_tensors="pt") |
|
|
dummy_input.pop('token_type_ids') |
|
|
|
|
|
dummy_input.to(DEVICE) |
|
|
model.to(DEVICE) |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = model(**dummy_input) |
|
|
|
|
|
embeddings = output.last_hidden_state[:, 0] |
|
|
``` |