File size: 2,597 Bytes
e186fb5
5a7f50a
 
 
 
 
 
 
 
e186fb5
 
5a7f50a
 
 
 
 
 
 
 
 
 
e186fb5
 
 
5a7f50a
e186fb5
5a7f50a
6657904
5a7f50a
 
e186fb5
5a7f50a
e186fb5
5a7f50a
6657904
5a7f50a
e186fb5
5a7f50a
 
e186fb5
 
 
 
 
 
 
 
6657904
5a7f50a
 
 
e186fb5
5a7f50a
 
e186fb5
 
5a7f50a
e186fb5
 
 
 
 
5a7f50a
e186fb5
 
 
5a7f50a
e186fb5
 
 
 
 
 
 
 
 
6657904
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import torch.nn as nn
import torch
from transformers import BertModel, BertConfig, PreTrainedModel

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

USE_CUDA = False
device = get_device()
if device.type == 'cuda':
    USE_CUDA = True

bert_path = 'indobenchmark/indobert-base-p2'
HIDDEN_DIM = 768
OUTPUT_DIM = 2 # 2 if Binary Classification
N_LAYERS = 1 # 2
BIDIRECTIONAL = True
DROPOUT = 0.2 # 0.2

class IndoBERTBiLSTM(PreTrainedModel):
    config_class = BertConfig
    def __init__(self, bert_config):
        super().__init__(bert_config)
        self.output_dim = OUTPUT_DIM
        self.n_layers = 1
        self.hidden_dim = HIDDEN_DIM
        self.bidirectional = BIDIRECTIONAL

        self.bert = BertModel.from_pretrained(bert_path)
        self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size,
                            hidden_size=self.hidden_dim,
                            num_layers=self.n_layers,
                            bidirectional=self.bidirectional,
                            batch_first=True)
        self.dropout = nn.Dropout(DROPOUT)
        self.output_layer = nn.Linear(self.hidden_dim * 2 if self.bidirectional else self.hidden_dim, self.output_dim)

    def forward(self, input_ids, attention_mask):

        hidden = self.init_hidden(input_ids.shape[0])
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = output.last_hidden_state

        lstm_output, (hidden_last, cn_last) = self.lstm(sequence_output, hidden)

        hidden_last_L=hidden_last[-2]
        hidden_last_R=hidden_last[-1]
        hidden_last_out=torch.cat([hidden_last_L,hidden_last_R],dim=-1) #[16, 1536]

        # apply dropout
        out = self.dropout(hidden_last_out)

        # output layer
        logits = self.output_layer(out)

        return logits

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data

        number = 1
        if self.bidirectional:
            number = 2

        if (USE_CUDA):
            hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda(),
                      weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda()
                     )
        else:
            hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float(),
                      weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float()
                     )

        return hidden