Spaces:
Runtime error
Runtime error
import torch.nn as nn | |
from transformers import PreTrainedModel, BertConfig | |
USE_CUDA = False | |
class IndoBERTBiLSTM(PreTrainedModel): | |
config_class = BertConfig | |
def __init__(self, bert_config, bert_pretrained_path, hidden_dim, num_classes, n_layers, bidirectional, dropout): | |
super().__init__(bert_config) | |
self.output_dim = num_classes | |
self.n_layers = n_layers | |
self.hidden_dim = hidden_dim | |
self.bidirectional = bidirectional | |
self.bert = bert_pretrained_path | |
self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size, | |
hidden_size=hidden_dim, | |
num_layers=n_layers, | |
bidirectional=bidirectional, | |
batch_first=True) | |
self.dropout = nn.Dropout(dropout) | |
self.global_pooling = nn.AdaptiveAvgPool1d(1) | |
self.hidden_layer = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim * 2 if bidirectional else hidden_dim) | |
self.output_layer = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, num_classes) | |
self.relu = nn.ReLU() | |
def forward(self, input_ids, attention_mask): | |
hidden = self.init_hidden(input_ids.shape[0]) | |
# print("hidden : ", type(hidden)) | |
output = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
sequence_output = output.last_hidden_state | |
# apply dropout | |
sequence_output = self.dropout(sequence_output) | |
# print('output size of the bert:', last_hidden_state.size()) | |
lstm_output, (hidden_last, cn_last) = self.lstm(sequence_output, hidden) | |
# print('output size of the LSTM:', lstm_output.size()) | |
lstm_output = self.dropout(lstm_output) | |
# global pooling | |
lstm_output = lstm_output.permute(0, 2, 1) | |
pooled_output = self.global_pooling(lstm_output).squeeze() | |
# pass through hidden layer | |
hidden_layer_output = self.hidden_layer(pooled_output) | |
hidden_layer_output = self.relu(hidden_layer_output) | |
# output layer | |
logits = self.output_layer(hidden_layer_output) | |
# logits = nn.Softmax(dim=1)(logits) | |
return logits | |
def init_hidden(self, batch_size): | |
weight = next(self.parameters()).data | |
number = 1 | |
if self.bidirectional: | |
number = 2 | |
if (USE_CUDA): | |
hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda(), | |
weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda() | |
) | |
else: | |
hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float(), | |
weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float() | |
) | |
return hidden | |
class IndoBERTModel(PreTrainedModel): | |
config_class = BertConfig | |
def __init__(self, bert_config, bert_pretrained, num_classes): | |
super().__init__(bert_config) | |
self.bert = bert_pretrained | |
self.dropout = nn.Dropout(0.1) | |
self.fc = nn.Linear(self.bert.config.hidden_size, num_classes) | |
def forward(self, input_ids, attention_mask): | |
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
pooled_output = outputs.pooler_output | |
pooled_output = self.dropout(pooled_output) | |
logits = self.fc(pooled_output) | |
return logits |