美文网首页深度学习
【转载】caffe python layer

【转载】caffe python layer

作者: dopami | 来源:发表于2018-09-21 14:11 被阅读0次

    本文链接:https://blog.csdn.net/thesby/article/details/51264439

    caffe的大多数层是由c++写成的,借助于c++的高效性,网络可以快速训练。但是我们有时候需要自己写点输入层以应对各种不同的数据输入,比如你因为是需要在图像中取块而不想写成LMDB,这时候可以考虑使用python直接写一个层。而且输入层不需要GPU加速,所需写起来也比较容易。

    python层怎么用

    先看一个网上的例子吧(来自http://chrischoy.github.io/research/caffe-python-layer/)

    layer {

      type: 'Python'

      name: 'loss'

      top: 'loss'

      bottom: 'ipx'

      bottom: 'ipy'

      python_param {

        # the module name -- usually the filename -- that needs to be in $PYTHONPATH

        module: 'pyloss'

        # the layer name -- the class name in the module

        layer: 'EuclideanLossLayer'

      }

      # set loss weight so Caffe knows this is a loss layer

      loss_weight: 1

    }

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    这里的type就只有Python一种,然后top,bottom和常见的层是一样的,module就是你的python module名字,一般就是文件名,然后layer就是定义的类的名字。

    python层怎么写

    这里就以 Fully Convolutional Networks for Semantic Segmentation 论文中公布的代码作为示例,解释python层该怎么写。

    import caffe

    import numpy as np

    from PIL import Image

    import random

    class VOCSegDataLayer(caffe.Layer):

        """

        Load (input image, label image) pairs from PASCAL VOC

        one-at-a-time while reshaping the net to preserve dimensions.

        Use this to feed data to a fully convolutional network.

        """

        def setup(self, bottom, top):

            """

            Setup data layer according to parameters:

            - voc_dir: path to PASCAL VOC year dir

            - split: train / val / test

            - mean: tuple of mean values to subtract

            - randomize: load in random order (default: True)

            - seed: seed for randomization (default: None / current time)

            for PASCAL VOC semantic segmentation.

            example

            params = dict(voc_dir="/path/to/PASCAL/VOC2011",

                mean=(104.00698793, 116.66876762, 122.67891434),

                split="val")

            """

            # config

            params = eval(self.param_str)

            self.voc_dir = params['voc_dir']

            self.split = params['split']

            self.mean = np.array(params['mean'])

            self.random = params.get('randomize', True)

            self.seed = params.get('seed', None)

            # two tops: data and label

            if len(top) != 2:

                raise Exception("Need to define two tops: data and label.")

            # data layers have no bottoms

            if len(bottom) != 0:

                raise Exception("Do not define a bottom.")

            # load indices for images and labels

            split_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,

                    self.split)

            self.indices = open(split_f, 'r').read().splitlines()

            self.idx = 0

            # make eval deterministic

            if 'train' not in self.split:

                self.random = False

            # randomization: seed and pick

            if self.random:

                random.seed(self.seed)

                self.idx = random.randint(0, len(self.indices)-1)

        def reshape(self, bottom, top):

            # load image + label image pair

            self.data = self.load_image(self.indices[self.idx])

            self.label = self.load_label(self.indices[self.idx])

            # reshape tops to fit (leading 1 is for batch dimension)

            top[0].reshape(1, *self.data.shape)

            top[1].reshape(1, *self.label.shape)

        def forward(self, bottom, top):

            # assign output

            top[0].data[...] = self.data

            top[1].data[...] = self.label

            # pick next input

            if self.random:

                self.idx = random.randint(0, len(self.indices)-1)

            else:

                self.idx += 1

                if self.idx == len(self.indices):

                    self.idx = 0

        def backward(self, top, propagate_down, bottom):

            pass

        def load_image(self, idx):

            """

            Load input image and preprocess for Caffe:

            - cast to float

            - switch channels RGB -> BGR

            - subtract mean

            - transpose to channel x height x width order

            """

            im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))

            in_ = np.array(im, dtype=np.float32)

            in_ = in_[:,:,::-1]

            in_ -= self.mean

            in_ = in_.transpose((2,0,1))

            return in_

        def load_label(self, idx):

            """

            Load label image as 1 x height x width integer array of label indices.

            The leading singleton dimension is required by the loss.

            """

            im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))

            label = np.array(im, dtype=np.uint8)

            label = label[np.newaxis, ...]

            return label

    class SBDDSegDataLayer(caffe.Layer):

        """

        Load (input image, label image) pairs from the SBDD extended labeling

        of PASCAL VOC for semantic segmentation

        one-at-a-time while reshaping the net to preserve dimensions.

        Use this to feed data to a fully convolutional network.

        """

        def setup(self, bottom, top):

            """

            Setup data layer according to parameters:

            - sbdd_dir: path to SBDD `dataset` dir

            - split: train / seg11valid

            - mean: tuple of mean values to subtract

            - randomize: load in random order (default: True)

            - seed: seed for randomization (default: None / current time)

            for SBDD semantic segmentation.

            N.B.segv11alid is the set of segval11 that does not intersect with SBDD.

            Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.

            example

            params = dict(sbdd_dir="/path/to/SBDD/dataset",

                mean=(104.00698793, 116.66876762, 122.67891434),

                split="valid")

            """

            # config

            params = eval(self.param_str)

            self.sbdd_dir = params['sbdd_dir']

            self.split = params['split']

            self.mean = np.array(params['mean'])

            self.random = params.get('randomize', True)

            self.seed = params.get('seed', None)

            # two tops: data and label

            if len(top) != 2:

                raise Exception("Need to define two tops: data and label.")

            # data layers have no bottoms

            if len(bottom) != 0:

                raise Exception("Do not define a bottom.")

            # load indices for images and labels

            split_f  = '{}/{}.txt'.format(self.sbdd_dir,

                    self.split)

            self.indices = open(split_f, 'r').read().splitlines()

            self.idx = 0

            # make eval deterministic

            if 'train' not in self.split:

                self.random = False

            # randomization: seed and pick

            if self.random:

                random.seed(self.seed)

                self.idx = random.randint(0, len(self.indices)-1)

        def reshape(self, bottom, top):

            # load image + label image pair

            self.data = self.load_image(self.indices[self.idx])

            self.label = self.load_label(self.indices[self.idx])

            # reshape tops to fit (leading 1 is for batch dimension)

            top[0].reshape(1, *self.data.shape)

            top[1].reshape(1, *self.label.shape)

        def forward(self, bottom, top):

            # assign output

            top[0].data[...] = self.data

            top[1].data[...] = self.label

            # pick next input

            if self.random:

                self.idx = random.randint(0, len(self.indices)-1)

            else:

                self.idx += 1

                if self.idx == len(self.indices):

                    self.idx = 0

        def backward(self, top, propagate_down, bottom):

            pass

        def load_image(self, idx):

            """

            Load input image and preprocess for Caffe:

            - cast to float

            - switch channels RGB -> BGR

            - subtract mean

            - transpose to channel x height x width order

            """

            im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))

            in_ = np.array(im, dtype=np.float32)

            in_ = in_[:,:,::-1]

            in_ -= self.mean

            in_ = in_.transpose((2,0,1))

            return in_

        def load_label(self, idx):

            """

            Load label image as 1 x height x width integer array of label indices.

            The leading singleton dimension is required by the loss.

            """

            import scipy.io

            mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))

            label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)

            label = label[np.newaxis, ...]

            return label

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    47

    48

    49

    50

    51

    52

    53

    54

    55

    56

    57

    58

    59

    60

    61

    62

    63

    64

    65

    66

    67

    68

    69

    70

    71

    72

    73

    74

    75

    76

    77

    78

    79

    80

    81

    82

    83

    84

    85

    86

    87

    88

    89

    90

    91

    92

    93

    94

    95

    96

    97

    98

    99

    100

    101

    102

    103

    104

    105

    106

    107

    108

    109

    110

    111

    112

    113

    114

    115

    116

    117

    118

    119

    120

    121

    122

    123

    124

    125

    126

    127

    128

    129

    130

    131

    132

    133

    134

    135

    136

    137

    138

    139

    140

    141

    142

    143

    144

    145

    146

    147

    148

    149

    150

    151

    152

    153

    154

    155

    156

    157

    158

    159

    160

    161

    162

    163

    164

    165

    166

    167

    168

    169

    170

    171

    172

    173

    174

    175

    176

    177

    178

    179

    180

    181

    182

    183

    184

    185

    186

    187

    188

    189

    190

    191

    192

    193

    194

    195

    196

    197

    198

    199

    200

    201

    202

    203

    204

    205

    206

    207

    208

    209

    210

    211

    212

    213

    214

    215

    216

    217

    218

    219

    220

    221

    222

    223

    224

    225

    226

    227

    228

    229

    230

    231

    232

    每个类都是层,类的名字就是layer参数的名字。这两个都是数据输入层,由于需要一个data,一个label,所以有两个top,没有bottomo。

    类直接继承的是caffe.Layer,然后必须重写setup(),reshape(),forward(),backward()函数,其他的函数可以自己定义,没有限制。

    setup()是类启动时该做的事情,比如层所需数据的初始化。

    reshape()就是取数据然后把它规范化为四维的矩阵。每次取数据都会调用此函数。

    forward()就是网络的前向运行,这里就是把取到的数据往前传递,因为没有其他运算。

    backward()就是网络的反馈,data层是没有反馈的,所以这里就直接pass。

    PS

    这里就把一些资料整合起来,以供参考吧。

    1、caffe官网现在开始有了点pycaffe的资料,但是鉴于caffe经常更新,不知道什么时候就把它删除,所需摘录到此。

    文件: pyloss.py

    import caffe

    import numpy as np

    class EuclideanLossLayer(caffe.Layer):

        """

        Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer

        to demonstrate the class interface for developing layers in Python.

        """

        def setup(self, bottom, top):

            # check input pair

            if len(bottom) != 2:

                raise Exception("Need two inputs to compute distance.")

        def reshape(self, bottom, top):

            # check input dimensions match

            if bottom[0].count != bottom[1].count:

                raise Exception("Inputs must have the same dimension.")

            # difference is shape of inputs

            self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)

            # loss output is scalar

            top[0].reshape(1)

        def forward(self, bottom, top):

            self.diff[...] = bottom[0].data - bottom[1].data

            top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.

        def backward(self, top, propagate_down, bottom):

            for i in range(2):

                if not propagate_down[i]:

                    continue

                if i == 0:

                    sign = 1

                else:

                    sign = -1

                bottom[i].diff[...] = sign * self.diff / bottom[i].num

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    下面这个就是如何使用这个层了:

    linreg.prototxt

    name: 'LinearRegressionExample'

    # define a simple network for linear regression on dummy data

    # that computes the loss by a PythonLayer.

    layer {

      type: 'DummyData'

      name: 'x'

      top: 'x'

      dummy_data_param {

        shape: { dim: 10 dim: 3 dim: 2 }

        data_filler: { type: 'gaussian' }

      }

    }

    layer {

      type: 'DummyData'

      name: 'y'

      top: 'y'

      dummy_data_param {

        shape: { dim: 10 dim: 3 dim: 2 }

        data_filler: { type: 'gaussian' }

      }

    }

    # include InnerProduct layers for parameters

    # so the net will need backward

    layer {

      type: 'InnerProduct'

      name: 'ipx'

      top: 'ipx'

      bottom: 'x'

      inner_product_param {

        num_output: 10

        weight_filler { type: 'xavier' }

      }

    }

    layer {

      type: 'InnerProduct'

      name: 'ipy'

      top: 'ipy'

      bottom: 'y'

      inner_product_param {

        num_output: 10

        weight_filler { type: 'xavier' }

      }

    }

    layer {

      type: 'Python'

      name: 'loss'

      top: 'loss'

      bottom: 'ipx'

      bottom: 'ipy'

      python_param {

        # the module name -- usually the filename -- that needs to be in $PYTHONPATH

        module: 'pyloss'

        # the layer name -- the class name in the module

        layer: 'EuclideanLossLayer'

      }

      # set loss weight so Caffe knows this is a loss layer.

      # since PythonLayer inherits directly from Layer, this isn't automatically

      # known to Caffe

      loss_weight: 1

    }

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    47

    48

    49

    50

    51

    52

    53

    54

    55

    56

    57

    58

    59

    60

    pascal_multilabel_datalayers.py

    # imports

    import json

    import time

    import pickle

    import scipy.misc

    import skimage.io

    import caffe

    import numpy as np

    import os.path as osp

    from xml.dom import minidom

    from random import shuffle

    from threading import Thread

    from PIL import Image

    from tools import SimpleTransformer

    class PascalMultilabelDataLayerSync(caffe.Layer):

        """

        This is a simple syncronous datalayer for training a multilabel model on

        PASCAL.

        """

        def setup(self, bottom, top):

            self.top_names = ['data', 'label']

            # === Read input parameters ===

            # params is a python dictionary with layer parameters.

            params = eval(self.param_str)

            # Check the paramameters for validity.

            check_params(params)

            # store input as class variables

            self.batch_size = params['batch_size']

            # Create a batch loader to load the images.

            self.batch_loader = BatchLoader(params, None)

            # === reshape tops ===

            # since we use a fixed input image size, we can shape the data layer

            # once. Else, we'd have to do it in the reshape call.

            top[0].reshape(

                self.batch_size, 3, params['im_shape'][0], params['im_shape'][1])

            # Note the 20 channels (because PASCAL has 20 classes.)

            top[1].reshape(self.batch_size, 20)

            print_info("PascalMultilabelDataLayerSync", params)

        def forward(self, bottom, top):

            """

            Load data.

            """

            for itt in range(self.batch_size):

                # Use the batch loader to load the next image.

                im, multilabel = self.batch_loader.load_next_image()

                # Add directly to the caffe data layer

                top[0].data[itt, ...] = im

                top[1].data[itt, ...] = multilabel

        def reshape(self, bottom, top):

            """

            There is no need to reshape the data, since the input is of fixed size

            (rows and columns)

            """

            pass

        def backward(self, top, propagate_down, bottom):

            """

            These layers does not back propagate

            """

            pass

    class BatchLoader(object):

        """

        This class abstracts away the loading of images.

        Images can either be loaded singly, or in a batch. The latter is used for

        the asyncronous data layer to preload batches while other processing is

        performed.

        """

        def __init__(self, params, result):

            self.result = result

            self.batch_size = params['batch_size']

            self.pascal_root = params['pascal_root']

            self.im_shape = params['im_shape']

            # get list of image indexes.

            list_file = params['split'] + '.txt'

            self.indexlist = [line.rstrip('\n') for line in open(

                osp.join(self.pascal_root, 'ImageSets/Main', list_file))]

            self._cur = 0  # current image

            # this class does some simple data-manipulations

            self.transformer = SimpleTransformer()

            print "BatchLoader initialized with {} images".format(

                len(self.indexlist))

        def load_next_image(self):

            """

            Load the next image in a batch.

            """

            # Did we finish an epoch?

            if self._cur == len(self.indexlist):

                self._cur = 0

                shuffle(self.indexlist)

            # Load an image

            index = self.indexlist[self._cur]  # Get the image index

            image_file_name = index + '.jpg'

            im = np.asarray(Image.open(

                osp.join(self.pascal_root, 'JPEGImages', image_file_name)))

            im = scipy.misc.imresize(im, self.im_shape)  # resize

            # do a simple horizontal flip as data augmentation

            flip = np.random.choice(2)*2-1

            im = im[:, ::flip, :]

            # Load and prepare ground truth

            multilabel = np.zeros(20).astype(np.float32)

            anns = load_pascal_annotation(index, self.pascal_root)

            for label in anns['gt_classes']:

                # in the multilabel problem we don't care how MANY instances

                # there are of each class. Only if they are present.

                # The "-1" is b/c we are not interested in the background

                # class.

                multilabel[label - 1] = 1

            self._cur += 1

            return self.transformer.preprocess(im), multilabel

    def load_pascal_annotation(index, pascal_root):

        """

        This code is borrowed from Ross Girshick's FAST-RCNN code

        (https://github.com/rbgirshick/fast-rcnn).

        It parses the PASCAL .xml metadata files.

        See publication for further details: (http://arxiv.org/abs/1504.08083).

        Thanks Ross!

        """

        classes = ('__background__',  # always index 0

                  'aeroplane', 'bicycle', 'bird', 'boat',

                  'bottle', 'bus', 'car', 'cat', 'chair',

                            'cow', 'diningtable', 'dog', 'horse',

                            'motorbike', 'person', 'pottedplant',

                            'sheep', 'sofa', 'train', 'tvmonitor')

        class_to_ind = dict(zip(classes, xrange(21)))

        filename = osp.join(pascal_root, 'Annotations', index + '.xml')

        # print 'Loading: {}'.format(filename)

        def get_data_from_tag(node, tag):

            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:

            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')

        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)

        gt_classes = np.zeros((num_objs), dtype=np.int32)

        overlaps = np.zeros((num_objs, 21), dtype=np.float32)

        # Load object bounding boxes into a data frame.

        for ix, obj in enumerate(objs):

            # Make pixel indexes 0-based

            x1 = float(get_data_from_tag(obj, 'xmin')) - 1

            y1 = float(get_data_from_tag(obj, 'ymin')) - 1

            x2 = float(get_data_from_tag(obj, 'xmax')) - 1

            y2 = float(get_data_from_tag(obj, 'ymax')) - 1

            cls = class_to_ind[

                str(get_data_from_tag(obj, "name")).lower().strip()]

            boxes[ix, :] = [x1, y1, x2, y2]

            gt_classes[ix] = cls

            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes': boxes,

                'gt_classes': gt_classes,

                'gt_overlaps': overlaps,

                'flipped': False,

                'index': index}

    def check_params(params):

        """

        A utility function to check the parameters for the data layers.

        """

        assert 'split' in params.keys(

        ), 'Params must include split (train, val, or test).'

        required = ['batch_size', 'pascal_root', 'im_shape']

        for r in required:

            assert r in params.keys(), 'Params must include {}'.format(r)

    def print_info(name, params):

        """

        Ouput some info regarding the class

        """

        print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(

            name,

            params['split'],

            params['batch_size'],

            params['im_shape'])

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    47

    48

    49

    50

    51

    52

    53

    54

    55

    56

    57

    58

    59

    60

    61

    62

    63

    64

    65

    66

    67

    68

    69

    70

    71

    72

    73

    74

    75

    76

    77

    78

    79

    80

    81

    82

    83

    84

    85

    86

    87

    88

    89

    90

    91

    92

    93

    94

    95

    96

    97

    98

    99

    100

    101

    102

    103

    104

    105

    106

    107

    108

    109

    110

    111

    112

    113

    114

    115

    116

    117

    118

    119

    120

    121

    122

    123

    124

    125

    126

    127

    128

    129

    130

    131

    132

    133

    134

    135

    136

    137

    138

    139

    140

    141

    142

    143

    144

    145

    146

    147

    148

    149

    150

    151

    152

    153

    154

    155

    156

    157

    158

    159

    160

    161

    162

    163

    164

    165

    166

    167

    168

    169

    170

    171

    172

    173

    174

    175

    176

    177

    178

    179

    180

    181

    182

    183

    184

    185

    186

    187

    188

    189

    190

    191

    192

    193

    194

    195

    196

    197

    198

    199

    200

    201

    202

    203

    204

    205

    206

    207

    208

    209

    210

    211

    212

    213

    214

    caffenet.py

    from __future__ import print_function

    from caffe import layers as L, params as P, to_proto

    from caffe.proto import caffe_pb2

    # helper function for common structures

    def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):

        conv = L.Convolution(bottom, kernel_size=ks, stride=stride,

                                    num_output=nout, pad=pad, group=group)

        return conv, L.ReLU(conv, in_place=True)

    def fc_relu(bottom, nout):

        fc = L.InnerProduct(bottom, num_output=nout)

        return fc, L.ReLU(fc, in_place=True)

    def max_pool(bottom, ks, stride=1):

        return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)

    def caffenet(lmdb, batch_size=256, include_acc=False):

        data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,

            transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True))

        # the net itself

        conv1, relu1 = conv_relu(data, 11, 96, stride=4)

        pool1 = max_pool(relu1, 3, stride=2)

        norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75)

        conv2, relu2 = conv_relu(norm1, 5, 256, pad=2, group=2)

        pool2 = max_pool(relu2, 3, stride=2)

        norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75)

        conv3, relu3 = conv_relu(norm2, 3, 384, pad=1)

        conv4, relu4 = conv_relu(relu3, 3, 384, pad=1, group=2)

        conv5, relu5 = conv_relu(relu4, 3, 256, pad=1, group=2)

        pool5 = max_pool(relu5, 3, stride=2)

        fc6, relu6 = fc_relu(pool5, 4096)

        drop6 = L.Dropout(relu6, in_place=True)

        fc7, relu7 = fc_relu(drop6, 4096)

        drop7 = L.Dropout(relu7, in_place=True)

        fc8 = L.InnerProduct(drop7, num_output=1000)

        loss = L.SoftmaxWithLoss(fc8, label)

        if include_acc:

            acc = L.Accuracy(fc8, label)

            return to_proto(loss, acc)

        else:

            return to_proto(loss)

    def make_net():

        with open('train.prototxt', 'w') as f:

            print(caffenet('/path/to/caffe-train-lmdb'), file=f)

        with open('test.prototxt', 'w') as f:

            print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)

    if __name__ == '__main__':

        make_net()

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    47

    48

    49

    50

    51

    52

    53

    54

    55

    tools.py

    import numpy as np

    class SimpleTransformer:

        """

        SimpleTransformer is a simple class for preprocessing and deprocessing

        images for caffe.

        """

        def __init__(self, mean=[128, 128, 128]):

            self.mean = np.array(mean, dtype=np.float32)

            self.scale = 1.0

        def set_mean(self, mean):

            """

            Set the mean to subtract for centering the data.

            """

            self.mean = mean

        def set_scale(self, scale):

            """

            Set the data scaling.

            """

            self.scale = scale

        def preprocess(self, im):

            """

            preprocess() emulate the pre-processing occuring in the vgg16 caffe

            prototxt.

            """

            im = np.float32(im)

            im = im[:, :, ::-1]  # change to BGR

            im -= self.mean

            im *= self.scale

            im = im.transpose((2, 0, 1))

            return im

        def deprocess(self, im):

            """

            inverse of preprocess()

            """

            im = im.transpose(1, 2, 0)

            im /= self.scale

            im += self.mean

            im = im[:, :, ::-1]  # change to RGB

            return np.uint8(im)

    class CaffeSolver:

        """

        Caffesolver is a class for creating a solver.prototxt file. It sets default

        values and can export a solver parameter file.

        Note that all parameters are stored as strings. Strings variables are

        stored as strings in strings.

        """

        def __init__(self, testnet_prototxt_path="testnet.prototxt",

                    trainnet_prototxt_path="trainnet.prototxt", debug=False):

            self.sp = {}

            # critical:

            self.sp['base_lr'] = '0.001'

            self.sp['momentum'] = '0.9'

            # speed:

            self.sp['test_iter'] = '100'

            self.sp['test_interval'] = '250'

            # looks:

            self.sp['display'] = '25'

            self.sp['snapshot'] = '2500'

            self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!

            # learning rate policy

            self.sp['lr_policy'] = '"fixed"'

            # important, but rare:

            self.sp['gamma'] = '0.1'

            self.sp['weight_decay'] = '0.0005'

            self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'

            self.sp['test_net'] = '"' + testnet_prototxt_path + '"'

            # pretty much never change these.

            self.sp['max_iter'] = '100000'

            self.sp['test_initialization'] = 'false'

            self.sp['average_loss'] = '25'  # this has to do with the display.

            self.sp['iter_size'] = '1'  # this is for accumulating gradients

            if (debug):

                self.sp['max_iter'] = '12'

                self.sp['test_iter'] = '1'

                self.sp['test_interval'] = '4'

                self.sp['display'] = '1'

        def add_from_file(self, filepath):

            """

            Reads a caffe solver prototxt file and updates the Caffesolver

            instance parameters.

            """

            with open(filepath, 'r') as f:

                for line in f:

                    if line[0] == '#':

                        continue

                    splitLine = line.split(':')

                    self.sp[splitLine[0].strip()] = splitLine[1].strip()

        def write(self, filepath):

            """

            Export solver parameters to INPUT "filepath". Sorted alphabetically.

            """

            f = open(filepath, 'w')

            for key, value in sorted(self.sp.items()):

                if not(type(value) is str):

                    raise TypeError('All solver parameters must be strings')

                f.write('%s: %s\n' % (key, value))

    ---------------------

    本文来自 thesby 的CSDN 博客 ,全文地址请点击:https://blog.csdn.net/thesby/article/details/51264439?utm_source=copy

    相关文章

      网友评论

        本文标题:【转载】caffe python layer

        本文链接:https://www.haomeiwen.com/subject/uszjyftx.html