forked from Helsinki-NLP/HBMP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathembeddings.py
132 lines (110 loc) · 4.6 KB
/
embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import time
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
class SentenceEmbedding(nn.Module):
"""
Prepare and encode sentence embeddings
"""
def __init__(self, config):
super(SentenceEmbedding, self).__init__()
self.config = config
self.word_embedding = nn.Embedding(config.embed_size, config.embed_dim)
self.word_embedding = self.word_embedding.cuda()
self.encoder = eval(config.encoder_type)(config)
def forward(self, input_sentence):
idx=input_sentence.cuda()
sentence = self.word_embedding(idx)
embedding = self.encoder(sentence)
return embedding
def encode(self, input_sentence):
embedding = self.encoder(sentence)
return embedding
class BiLSTMMaxPoolEncoder(nn.Module):
"""
Bidirectional LSTM with max pooling
"""
def __init__(self, config):
super(BiLSTMMaxPoolEncoder, self).__init__()
self.config = config
self.rnn1 = nn.LSTM(input_size=config.embed_dim,
hidden_size=config.hidden_dim,
num_layers=config.layers,
dropout=config.dropout,
bidirectional=True)
self.max_pool = nn.AdaptiveMaxPool1d(1)
def forward(self, inputs):
batch_size = inputs.size()[1]
h_0 = c_0 = Variable(inputs.data.new(self.config.cells,
batch_size,
self.config.hidden_dim).zero_())
embedding = self.rnn1(inputs, (h_0, c_0))[0]
# Max pooling
emb = self.max_pool(embedding.permute(1,2,0))
emb = emb.squeeze(2)
return emb
class LSTMEncoder(nn.Module):
"""
Basic LSTM Encoder
"""
def __init__(self, config):
super(LSTMEncoder, self).__init__()
self.config = config
self.rnn = nn.LSTM(input_size=config.embed_dim,
hidden_size=config.hidden_dim,
num_layers=config.layers,
dropout=config.dropout,
bidirectional=False)
self.batch_norm = nn.BatchNorm1d(config.hidden_dim)
def forward(self, inputs):
batch_size = inputs.size()[1]
h_0 = c_0 = Variable(inputs.data.new(self.config.cells,
batch_size,
self.config.hidden_dim).zero_())
embedding = self.rnn(inputs, (h_0, c_0))[1][0]
embedding = embedding.squeeze(0)
embedding = self.batch_norm(embedding)
return embedding
class HBMP(nn.Module):
"""
Hierarchical Bi-LSTM Max Pooling Encoder
"""
def __init__(self, config):
super(HBMP, self).__init__()
self.config = config
self.max_pool = nn.AdaptiveMaxPool1d(1)
self.cells = config.cells
self.hidden_dim = config.hidden_dim
self.rnn1 = nn.LSTM(input_size=config.embed_dim,
hidden_size=config.hidden_dim,
num_layers=config.layers,
dropout=config.dropout,
bidirectional=True)
self.rnn2 = nn.LSTM(input_size=config.embed_dim,
hidden_size=config.hidden_dim,
num_layers=config.layers,
dropout=config.dropout,
bidirectional=True)
self.rnn3 = nn.LSTM(input_size=config.embed_dim,
hidden_size=config.hidden_dim,
num_layers=config.layers,
dropout=config.dropout,
bidirectional=True)
def forward(self, inputs):
batch_size = inputs.size()[1]
h_0 = c_0 = Variable(inputs.data.new(self.config.cells,
batch_size,
self.config.hidden_dim).zero_())
self.rnn1.flatten_parameters()
out1, (ht1, ct1) = self.rnn1(inputs, (h_0, c_0))
emb1 = self.max_pool(out1.permute(1,2,0)).permute(2,0,1)
self.rnn2.flatten_parameters()
out2, (ht2, ct2) = self.rnn2(inputs, (ht1, ct1))
emb2 = self.max_pool(out2.permute(1,2,0)).permute(2,0,1)
self.rnn3.flatten_parameters()
out3, (ht3, ct3) = self.rnn3(inputs, (ht2, ct2))
emb3 = self.max_pool(out3.permute(1,2,0)).permute(2,0,1)
emb = torch.cat([emb1, emb2, emb3], 2)
emb = emb.squeeze(0)
return emb