-
Notifications
You must be signed in to change notification settings - Fork 18
/
model.py
110 lines (89 loc) · 4.36 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import math
from dataclasses import dataclass
from typing import Optional, Tuple
import torch
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers.models.longformer.modeling_longformer import LongformerPreTrainedModel, LongformerModel, LongformerLMHead, LongformerMaskedLMOutput
class LongformerForMaskedLM(LongformerPreTrainedModel):
_keys_to_ignore_on_load_unexpected = [r"pooler"]
def __init__(self, config):
super().__init__(config)
self.longformer = LongformerModel(config, add_pooling_layer=False)
self.lm_head = LongformerLMHead(config)
self.label_yes_id = config.label_yes
self.label_no_id = config.label_no
self.mask_token_id = config.mask_token_id
self.init_weights()
def get_output_embeddings(self):
return self.lm_head.decoder
def set_output_embeddings(self, new_embeddings):
self.lm_head.decoder = new_embeddings
def forward(
self,
input_ids=None,
attention_mask=None,
global_attention_mask=None,
head_mask=None,
token_type_ids=None,
position_ids=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
Returns:
Examples::
>>> import torch
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
>>> loss = outputs.loss
>>> prediction_logits = output.logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.longformer(
input_ids,
attention_mask=attention_mask,
global_attention_mask=global_attention_mask,
head_mask=head_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
# sequence_output = outputs[0]
sequence_output = outputs[0][input_ids == self.mask_token_id]
prediction_scores = self.lm_head(sequence_output)
masked_lm_loss = None
if labels is not None:
loss_fct = CrossEntropyLoss()
masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
prediction_scores = prediction_scores[:,self.label_yes_id] - prediction_scores[:,self.label_no_id]
prediction_scores = prediction_scores.view(input_ids.shape[0],-1)
if not return_dict:
output = (prediction_scores,) + outputs[2:]
return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
return LongformerMaskedLMOutput(
loss=masked_lm_loss,
logits=prediction_scores,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
global_attentions=outputs.global_attentions,
)