对于大模型Fine-tune,必须冻结大部分低层神经网络的参数,以下命令用于查看模型冻结情况,以便调优。
for name, parameter in model.named_parameters():
# 如果只想训练bert的参数的话:
name = str(name).split('.') # 将torch类型转换为string以便处理
print(name)
print(type(name))
# 根据层的名称冻结参数
if len(name)>3: #
if name[3].isalpha() :
if name[1] == 'pooler' :
parameter.requires_grad = True
else :
parameter.requires_grad = False
else :
if int(name[3])>5:
parameter.requires_grad = True
else :
parameter.requires_grad = False
else:
parameter.requires_grad = True
# print([p.requires_grad for name, p in model.named_parameters()])
输出结果如下:
bert.embeddings.word_embeddings.weight True bert.embeddings.position_embeddings.weight True bert.embeddings.token_type_embeddings.weight True bert.embeddings.LayerNorm.weight True bert.embeddings.LayerNorm.bias True bert.encoder.layer.0.attention.self.query.weight True bert.encoder.layer.0.attention.self.query.bias True bert.encoder.layer.0.attention.self.key.weight True bert.encoder.layer.0.attention.self.key.bias True bert.encoder.layer.0.attention.self.value.weight True bert.encoder.layer.0.attention.self.value.bias True bert.encoder.layer.0.attention.output.dense.weight True bert.encoder.layer.0.attention.output.dense.bias True bert.encoder.layer.0.attention.output.LayerNorm.weight True bert.encoder.layer.0.attention.output.LayerNorm.bias True bert.encoder.layer.0.intermediate.dense.weight True bert.encoder.layer.0.intermediate.dense.bias True bert.encoder.layer.0.output.dense.weight True bert.encoder.layer.0.output.dense.bias True bert.encoder.layer.0.output.LayerNorm.weight True bert.encoder.layer.0.output.LayerNorm.bias True bert.encoder.layer.1.attention.self.query.weight True bert.encoder.layer.1.attention.self.query.bias True bert.encoder.layer.1.attention.self.key.weight True bert.encoder.layer.1.attention.self.key.bias True bert.encoder.layer.1.attention.self.value.weight True bert.encoder.layer.1.attention.self.value.bias True bert.encoder.layer.1.attention.output.dense.weight True bert.encoder.layer.1.attention.output.dense.bias True bert.encoder.layer.1.attention.output.LayerNorm.weight True bert.encoder.layer.1.attention.output.LayerNorm.bias True bert.encoder.layer.1.intermediate.dense.weight True bert.encoder.layer.1.intermediate.dense.bias True bert.encoder.layer.1.output.dense.weight True bert.encoder.layer.1.output.dense.bias True bert.encoder.layer.1.output.LayerNorm.weight True bert.encoder.layer.1.output.LayerNorm.bias True bert.encoder.layer.2.attention.self.query.weight True bert.encoder.layer.2.attention.self.query.bias True bert.encoder.layer.2.attention.self.key.weight True bert.encoder.layer.2.attention.self.key.bias True bert.encoder.layer.2.attention.self.value.weight True bert.encoder.layer.2.attention.self.value.bias True bert.encoder.layer.2.attention.output.dense.weight True bert.encoder.layer.2.attention.output.dense.bias True bert.encoder.layer.2.attention.output.LayerNorm.weight True bert.encoder.layer.2.attention.output.LayerNorm.bias True bert.encoder.layer.2.intermediate.dense.weight True bert.encoder.layer.2.intermediate.dense.bias True bert.encoder.layer.2.output.dense.weight True bert.encoder.layer.2.output.dense.bias True bert.encoder.layer.2.output.LayerNorm.weight True bert.encoder.layer.2.output.LayerNorm.bias True bert.encoder.layer.3.attention.self.query.weight True bert.encoder.layer.3.attention.self.query.bias True bert.encoder.layer.3.attention.self.key.weight True bert.encoder.layer.3.attention.self.key.bias True bert.encoder.layer.3.attention.self.value.weight True bert.encoder.layer.3.attention.self.value.bias True bert.encoder.layer.3.attention.output.dense.weight True bert.encoder.layer.3.attention.output.dense.bias True bert.encoder.layer.3.attention.output.LayerNorm.weight True bert.encoder.layer.3.attention.output.LayerNorm.bias True bert.encoder.layer.3.intermediate.dense.weight True bert.encoder.layer.3.intermediate.dense.bias True bert.encoder.layer.3.output.dense.weight True bert.encoder.layer.3.output.dense.bias True bert.encoder.layer.3.output.LayerNorm.weight True bert.encoder.layer.3.output.LayerNorm.bias True bert.encoder.layer.4.attention.self.query.weight True bert.encoder.layer.4.attention.self.query.bias True bert.encoder.layer.4.attention.self.key.weight True bert.encoder.layer.4.attention.self.key.bias True bert.encoder.layer.4.attention.self.value.weight True bert.encoder.layer.4.attention.self.value.bias True bert.encoder.layer.4.attention.output.dense.weight True bert.encoder.layer.4.attention.output.dense.bias True bert.encoder.layer.4.attention.output.LayerNorm.weight True bert.encoder.layer.4.attention.output.LayerNorm.bias True bert.encoder.layer.4.intermediate.dense.weight True bert.encoder.layer.4.intermediate.dense.bias True bert.encoder.layer.4.output.dense.weight True bert.encoder.layer.4.output.dense.bias True bert.encoder.layer.4.output.LayerNorm.weight True bert.encoder.layer.4.output.LayerNorm.bias True bert.encoder.layer.5.attention.self.query.weight True bert.encoder.layer.5.attention.self.query.bias True bert.encoder.layer.5.attention.self.key.weight True bert.encoder.layer.5.attention.self.key.bias True bert.encoder.layer.5.attention.self.value.weight True bert.encoder.layer.5.attention.self.value.bias True bert.encoder.layer.5.attention.output.dense.weight True bert.encoder.layer.5.attention.output.dense.bias True bert.encoder.layer.5.attention.output.LayerNorm.weight True bert.encoder.layer.5.attention.output.LayerNorm.bias True bert.encoder.layer.5.intermediate.dense.weight True bert.encoder.layer.5.intermediate.dense.bias True bert.encoder.layer.5.output.dense.weight True bert.encoder.layer.5.output.dense.bias True bert.encoder.layer.5.output.LayerNorm.weight True bert.encoder.layer.5.output.LayerNorm.bias True bert.encoder.layer.6.attention.self.query.weight True bert.encoder.layer.6.attention.self.query.bias True bert.encoder.layer.6.attention.self.key.weight True bert.encoder.layer.6.attention.self.key.bias True bert.encoder.layer.6.attention.self.value.weight True bert.encoder.layer.6.attention.self.value.bias True bert.encoder.layer.6.attention.output.dense.weight True bert.encoder.layer.6.attention.output.dense.bias True bert.encoder.layer.6.attention.output.LayerNorm.weight True bert.encoder.layer.6.attention.output.LayerNorm.bias True bert.encoder.layer.6.intermediate.dense.weight True bert.encoder.layer.6.intermediate.dense.bias True bert.encoder.layer.6.output.dense.weight True bert.encoder.layer.6.output.dense.bias True bert.encoder.layer.6.output.LayerNorm.weight True bert.encoder.layer.6.output.LayerNorm.bias True bert.encoder.layer.7.attention.self.query.weight True bert.encoder.layer.7.attention.self.query.bias True bert.encoder.layer.7.attention.self.key.weight True bert.encoder.layer.7.attention.self.key.bias True bert.encoder.layer.7.attention.self.value.weight True bert.encoder.layer.7.attention.self.value.bias True bert.encoder.layer.7.attention.output.dense.weight True bert.encoder.layer.7.attention.output.dense.bias True bert.encoder.layer.7.attention.output.LayerNorm.weight True bert.encoder.layer.7.attention.output.LayerNorm.bias True bert.encoder.layer.7.intermediate.dense.weight True bert.encoder.layer.7.intermediate.dense.bias True bert.encoder.layer.7.output.dense.weight True bert.encoder.layer.7.output.dense.bias True bert.encoder.layer.7.output.LayerNorm.weight True bert.encoder.layer.7.output.LayerNorm.bias True bert.encoder.layer.8.attention.self.query.weight True bert.encoder.layer.8.attention.self.query.bias True bert.encoder.layer.8.attention.self.key.weight True bert.encoder.layer.8.attention.self.key.bias True bert.encoder.layer.8.attention.self.value.weight True bert.encoder.layer.8.attention.self.value.bias True bert.encoder.layer.8.attention.output.dense.weight True bert.encoder.layer.8.attention.output.dense.bias True bert.encoder.layer.8.attention.output.LayerNorm.weight True bert.encoder.layer.8.attention.output.LayerNorm.bias True bert.encoder.layer.8.intermediate.dense.weight True bert.encoder.layer.8.intermediate.dense.bias True bert.encoder.layer.8.output.dense.weight True bert.encoder.layer.8.output.dense.bias True bert.encoder.layer.8.output.LayerNorm.weight True bert.encoder.layer.8.output.LayerNorm.bias True bert.encoder.layer.9.attention.self.query.weight True bert.encoder.layer.9.attention.self.query.bias True bert.encoder.layer.9.attention.self.key.weight True bert.encoder.layer.9.attention.self.key.bias True bert.encoder.layer.9.attention.self.value.weight True bert.encoder.layer.9.attention.self.value.bias True bert.encoder.layer.9.attention.output.dense.weight True bert.encoder.layer.9.attention.output.dense.bias True bert.encoder.layer.9.attention.output.LayerNorm.weight True bert.encoder.layer.9.attention.output.LayerNorm.bias True bert.encoder.layer.9.intermediate.dense.weight True bert.encoder.layer.9.intermediate.dense.bias True bert.encoder.layer.9.output.dense.weight True bert.encoder.layer.9.output.dense.bias True bert.encoder.layer.9.output.LayerNorm.weight True bert.encoder.layer.9.output.LayerNorm.bias True bert.encoder.layer.10.attention.self.query.weight True bert.encoder.layer.10.attention.self.query.bias True bert.encoder.layer.10.attention.self.key.weight True bert.encoder.layer.10.attention.self.key.bias True bert.encoder.layer.10.attention.self.value.weight True bert.encoder.layer.10.attention.self.value.bias True bert.encoder.layer.10.attention.output.dense.weight True bert.encoder.layer.10.attention.output.dense.bias True bert.encoder.layer.10.attention.output.LayerNorm.weight True bert.encoder.layer.10.attention.output.LayerNorm.bias True bert.encoder.layer.10.intermediate.dense.weight True bert.encoder.layer.10.intermediate.dense.bias True bert.encoder.layer.10.output.dense.weight True bert.encoder.layer.10.output.dense.bias True bert.encoder.layer.10.output.LayerNorm.weight True bert.encoder.layer.10.output.LayerNorm.bias True bert.encoder.layer.11.attention.self.query.weight True bert.encoder.layer.11.attention.self.query.bias True bert.encoder.layer.11.attention.self.key.weight True bert.encoder.layer.11.attention.self.key.bias True bert.encoder.layer.11.attention.self.value.weight True bert.encoder.layer.11.attention.self.value.bias True bert.encoder.layer.11.attention.output.dense.weight True bert.encoder.layer.11.attention.output.dense.bias True bert.encoder.layer.11.attention.output.LayerNorm.weight True bert.encoder.layer.11.attention.output.LayerNorm.bias True bert.encoder.layer.11.intermediate.dense.weight True bert.encoder.layer.11.intermediate.dense.bias True bert.encoder.layer.11.output.dense.weight True bert.encoder.layer.11.output.dense.bias True bert.encoder.layer.11.output.LayerNorm.weight True bert.encoder.layer.11.output.LayerNorm.bias True bert.pooler.dense.weight True bert.pooler.dense.bias True F1.weight True F1.bias True F2.weight True F2.bias True



