Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

似乎你们的剪枝相关的函数写的有点问题 #1890

Open
yedaotian9 opened this issue Jul 27, 2024 · 1 comment
Open

似乎你们的剪枝相关的函数写的有点问题 #1890

yedaotian9 opened this issue Jul 27, 2024 · 1 comment
Assignees

Comments

@yedaotian9
Copy link

该函数的引用方式:
from paddleslim.nas.ofa.utils import nlp_utils
该函数的原文:
def compute_neuron_head_importance(task_name,
model,
data_loader,
num_layers,
num_heads,
loss_fct=paddle.nn.loss.CrossEntropyLoss(),
intermediate_name='linear1',
output_name='linear2'):
"""
Compute the importance of multi-head attention and feed-forward neuron in each transformer layer.

Args:
    task_name(str): task name.
    model(paddle.nn.Layer): the instance of transformer model.
    data_loader(DataLoader): An iterable data loader is used for evaluate. An instance of `paddle.io.Dataloader`.
    num_layers(int): number of transformer layers.
    num_heads(int): number of heads in each multi-head attention.
    loss_fct(Loss|optional): loss function can be a `paddle.nn.Layer` instance. Default: `nn.loss.CrossEntropyLoss()`.
    intermediate_name(str|optional): the name of intermediate `Linear` layer in feed-forward. Default: `linear1`.
    output_name(str|optional): the name of output `Linear` layer in feed-forward. Default: `linear2`.
"""
head_importance = paddle.zeros(
    shape=[num_layers, num_heads], dtype='float32')
head_mask = paddle.ones(shape=[num_layers, num_heads], dtype='float32')
head_mask.stop_gradient = False

intermediate_weight = []
intermediate_bias = []
output_weight = []

for name, w in model.named_parameters():
    if intermediate_name in name:
        if len(w.shape) > 1:
            intermediate_weight.append(w)
        else:
            intermediate_bias.append(w)

    if output_name in name:
        if len(w.shape) > 1:
            output_weight.append(w)

neuron_importance = []
for w in intermediate_weight:
    neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32'))

if task_name.lower() != 'mnli':
    data_loader = (data_loader, )
for data in data_loader:
    for batch in data:
        if isinstance(batch, dict):
            input_ids, segment_ids, labels = batch['input_ids'], batch[
                'token_type_ids'], batch['labels']
        else:
            input_ids, segment_ids, labels = batch
        logits = model(
            input_ids, segment_ids, attention_mask=[None, head_mask])
        loss = loss_fct(logits, labels)
        loss.backward()
        head_importance += paddle.abs(
            paddle.to_tensor(head_mask.gradient()))

        for w1, b1, w2, current_importance in zip(
                intermediate_weight, intermediate_bias, output_weight,
                neuron_importance):
            current_importance += np.abs(
                (np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
                 b1.gradient()))
            current_importance += np.abs(
                np.sum(w2.numpy() * w2.gradient(), axis=1))

return head_importance, neuron_importance

在使用该函数时,我遇到了报错:
AttributeError Traceback (most recent call last)
Cell In[46], line 180
172 dev_batch_sampler = paddle.io.BatchSampler(
173 dev_ds, batch_size=4, shuffle=False)
174 dev_data_loader = DataLoader(
175 dataset=dev_ds,
176 #batch_sampler=dev_batch_sampler,
177 #collate_fn=batchify_fn
178 )
--> 180 head_importance, neuron_importance = nlp_utils.compute_neuron_head_importance(
181 task_name='cluewsc2020',
182 model=ofa_model.model,
183 data_loader=dev_ds,
184 loss_fct=paddle.nn.loss.CrossEntropyLoss(
185 ) if [True,False] else paddle.nn.loss.MSELoss(),
186 num_layers=model.ppminilm.config['num_hidden_layers'],
187 num_heads=model.ppminilm.config['num_attention_heads'])
189 # 重新组合参数的顺序
190 reorder_neuron_head(ofa_model.model, head_importance, neuron_importance)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleslim/nas/ofa/utils/nlp_utils.py:76, in compute_neuron_head_importance(task_name, model, data_loader, num_layers, num_heads, loss_fct, intermediate_name, output_name)
74 else:
75 input_ids, segment_ids, labels = batch
---> 76 logits = model(
77 input_ids, segment_ids, attention_mask=[None, head_mask])
78 loss = loss_fct(logits, labels)
79 loss.backward()

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddle/nn/layer/layers.py:1426, in Layer.call(self, *inputs, **kwargs)
1417 if (
1418 (not in_to_static_mode())
1419 and (not self._forward_pre_hooks)
(...)
1423 and (not in_profiler_mode())
1424 ):
1425 self._build_once(*inputs, **kwargs)
-> 1426 return self.forward(*inputs, **kwargs)
1427 else:
1428 return self._dygraph_call_func(*inputs, **kwargs)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddlenlp/transformers/ppminilm/modeling.py:300, in PPMiniLMForSequenceClassification.forward(self, input_ids, token_type_ids, position_ids, attention_mask)
270 def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None):
271 r"""
272 Args:
273 input_ids (Tensor):
(...)
298
299 """
--> 300 _, pooled_output = self.ppminilm(
301 input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask
302 )
304 pooled_output = self.dropout(pooled_output)
305 logits = self.classifier(pooled_output)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddle/nn/layer/layers.py:1426, in Layer.call(self, *inputs, **kwargs)
1417 if (
1418 (not in_to_static_mode())
1419 and (not self._forward_pre_hooks)
(...)
1423 and (not in_profiler_mode())
1424 ):
1425 self._build_once(*inputs, **kwargs)
-> 1426 return self.forward(*inputs, **kwargs)
1427 else:
1428 return self._dygraph_call_func(*inputs, **kwargs)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddlenlp/transformers/ppminilm/modeling.py:230, in PPMiniLMModel.forward(self, input_ids, token_type_ids, position_ids, attention_mask)
226 attention_mask = paddle.unsqueeze(
227 (input_ids == self.pad_token_id).astype(self.pooler.dense.weight.dtype) * -1e4, axis=[1, 2]
228 )
229 else:
--> 230 if attention_mask.ndim == 2:
231 # attention_mask [batch_size, sequence_length] -> [batch_size, 1, 1, sequence_length]
232 attention_mask = attention_mask.unsqueeze(axis=[1, 2]).astype(paddle.get_default_dtype())
233 attention_mask = (1.0 - attention_mask) * -1e4

AttributeError: 'list' object has no attribute 'ndim'

经过我的甄别,我觉得该函数的attention_mask部分写的有问题:
input_ids, segment_ids, attention_mask=[None, head_mask])
在这一行代码中,attention_mask=[None, head_mask],这导致了函数的报错

@minghaoBD
Copy link
Collaborator

你好,抱歉回复不及时。这个是NAS的模块,直接用剪枝请参照 https://github.com/PaddlePaddle/PaddleSlim/blob/release/2.0.0/docs/zh_cn/tutorials/pruning/overview.md

另外,如果需要针对LLM做剪枝,需要适配下,PaddleSlim暂不支持。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants