Transfer a pre-trained Keras model to Pytorch
本篇记录了如何将预训练好的Keras model 的参数转化为pytorch的参数
起因:我有一个tensorflow框架的代码,里面含有一个已经训练好的Keras model,我需要利用这个model预测mnist数据集,但是我不想用Kerasmodel,所以就希望建立一个相同的pytorch model,并把参数迁移过来。
参考文献:
https://github.com/gzuidhof/nn-transfer/blob/master/example.ipynb
https://discuss.pytorch.org/t/transferring-weights-from-keras-to-pytorch/9889
准备工作:
建立两个相同的model:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import keras
import os
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.models import load_model
from keras.datasets import mnist
class pytorch_Net(nn.Module):
def __init__(self):
super(pytorch_Net, self).__init__()
self.num_channels = 1
self.image_size = 28
self.num_labels = 10
self.conv2d_1 = nn.Conv2d(1, 32, 3, 1)
self.conv2d_2 = nn.Conv2d(32, 32, 3, 1)
self.conv2d_3 = nn.Conv2d(32, 64, 3, 1)
self.conv2d_4 = nn.Conv2d(64, 64, 3, 1)
self.dense_1 = nn.Linear(4 * 4 * 64, 200)
self.dense_2 = nn.Linear(200, 200)
self.dense_3 = nn.Linear(200, 10)
def forward(self, x):
x = F.relu(self.conv2d_1(x))
x = F.relu(self.conv2d_2(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2d_3(x))
x = F.relu(self.conv2d_4(x))
x = F.max_pool2d(x, 2, 2)
# !!here should permute the variable,
# since the Flatten function in Keras permuted the variable.
x = x.permute((0, 2, 3, 1))
x = x.contiguous().view(-1, 4 * 4 * 64)
x = F.relu(self.dense_1(x))
x = F.relu(self.dense_2(x))
x = self.dense_3(x)
x = F.softmax(x, dim=1)
return x
def keras_Net():
model = Sequential()
model.add(Conv2D(32, (3, 3),
input_shape=(28, 28, 1)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#
model.add(Flatten())
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta())
return model
建立转换参数的函数:
def keras_to_pyt(km, pm):
weight_dict = dict()
for layer in km.layers:
if type(layer) is keras.layers.convolutional.Conv2D:
weight_dict[layer.get_config()['name'] + '.weight'] = np.transpose(layer.get_weights()[0], (3, 2, 0, 1))
weight_dict[layer.get_config()['name'] + '.bias'] = layer.get_weights()[1]
elif type(layer) is keras.layers.Dense:
weight_dict[layer.get_config()['name'] + '.weight'] = np.transpose(layer.get_weights()[0], (1, 0))
weight_dict[layer.get_config()['name'] + '.bias'] = layer.get_weights()[1]
pyt_state_dict = pm.state_dict()
for key in pyt_state_dict.keys():
pyt_state_dict[key] = torch.from_numpy(weight_dict[key])
pm.load_state_dict(pyt_state_dict)
return pm
建立主函数,转换model的参数并进行测试
def main():
# define the model
keras_network = keras_Net()
keras_network.load_weights("models/mnist")
# print_keras_model(keras_network)
pytorch_network = pytorch_Net()
# transfer keras model to pytorch
pytorch_network = keras_to_pyt(keras_network, pytorch_network)
torch.save(pytorch_network.state_dict(), "pyt_model.pt")
############### test the performance of two models#########
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
y_train = y_train.astype('float32')
inp = np.expand_dims(x_train[0:100], axis=1)
print('inp.shape', inp.shape)
inp_pyt = torch.autograd.Variable(torch.from_numpy(inp.copy()).float())
inp_keras = np.transpose(inp.copy(), (0, 2, 3, 1))
pyt_res = pytorch_network(inp_pyt).data.numpy()
keras_res = keras_network.predict(x=inp_keras, verbose=1)
for i in range(100):
predict1 = np.argmax(pyt_res[i])
predict2 = np.argmax(keras_res[i])
if predict1 != predict2:
print("ERROR: Two model ooutput are different!")
elif predict1 != y_train[i]:
print("The model predict for {}th image is wrong".format(i+1))
附赠两个功能:
打印Python model的参数:
def print_keras_model(keras_model):
for layer in keras_model.layers:
print("layer.get_config():", layer.get_config()['name'])
weights = layer.get_weights()
if len(weights) == 2:
print("layer.get_weights():", weights[0].shape, weights[1].shape)
对比Keras和Python model的参数是否一致
def compare_weight(keras_model, pytorch_model, weight_name='conv2d_1.weight'):
for name, param in pytorch_model.named_parameters():
print(name)
if name == weight_name:
pyt_weight = param.detach().numpy()
print("pyt_weight.shape:", pyt_weight.shape)
for layer in keras_model.layers:
if layer.get_config()['name'] == weight_name:
if weight_name.split()[1] == 'weight':
keras_weight = layer.get_weights()[0]
keras_weight = np.transpose(keras_weight, (3, 2, 0, 1))
elif weight_name.split()[1] == 'bias':
keras_weight = layer.get_weights()[1]
print("weight_dis", pyt_weight - keras_weight)
网友评论