美文网首页
【darknet训练细节】一个隐藏的超参数:scale

【darknet训练细节】一个隐藏的超参数:scale

作者: yuanCruise | 来源:发表于2019-06-16 14:44 被阅读0次

    整体源码如下,该源码是利用yolo训练检测网络时,对输入数据作前处理的源代码,该代码段中包含了从外部传入的超参数(jitter),也有内部写死的超参数(scale)。下面将对该代码做详细的解析。

    data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
    {
        char **random_paths = get_random_paths(paths, n, m);
        int i;
        data d = {0};
        d.shallow = 0;
    
        d.X.rows = n;
        d.X.vals = calloc(d.X.rows, sizeof(float*));
        d.X.cols = h*w*3;
    
        d.y = make_matrix(n, 5*boxes);
        for(i = 0; i < n; ++i){
            image orig = load_image_color(random_paths[i], 0, 0);
            image sized = make_image(w, h, orig.c);
            fill_image(sized, .5);
    
            float dw = jitter * orig.w;
            float dh = jitter * orig.h;
    
            float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
            //float scale = rand_uniform(.25, 2);
            float scale = 1;
    
            float nw, nh;
    
            if(new_ar < 1){
                nh = scale * h;
                nw = nh * new_ar;
            } else {
                nw = scale * w;
                nh = nw / new_ar;
            }
    
            float dx = rand_uniform(0, w - nw);
            float dy = rand_uniform(0, h - nh);
    
            place_image(orig, nw, nh, dx, dy, sized);
    
            random_distort_image(sized, hue, saturation, exposure);
    
            int flip = rand()%2;
            if(flip) flip_image(sized);
            d.X.vals[i] = sized.data;
    
    
            fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h);
    
            free_image(orig);
        }
        free(random_paths);
        return d;
    }
    

    1.新建data数据,data是包含了图片数据和其对应的标签文件,下述代码初始化了一个data数据结构,并根据初始化大小分配了内存空间。

        data d = {0};
        d.shallow = 0;
    
        d.X.rows = n;
        d.X.vals = calloc(d.X.rows, sizeof(float*));
        d.X.cols = h*w*3;
    
        d.y = make_matrix(n, 5*boxes);
    

    上述代码段中用到的make_matrix函数用来分配各种大小的矩阵的内存空间。本例中是用来分配每个通道n,所对应的每个标签box,因为每个box包含了xywhid五个值,所以要乘以5.

    matrix make_matrix(int rows, int cols)
    {
        int i;
        matrix m;
        m.rows = rows;
        m.cols = cols;
        m.vals = calloc(m.rows, sizeof(float *));
        for(i = 0; i < m.rows; ++i){
            m.vals[i] = calloc(m.cols, sizeof(float));
        }
        return m;
    }
    

    2.导入图片数据,如下所示函数将图片读入,创建内存空间。

    image orig = load_image_color(random_paths[i], 0, 0);
    image sized = make_image(w, h, orig.c);
    fill_image(sized, .5);
    

    上述代码中先用load_image_color将图片导入,从如下源码中我们发现在导入的过程中,图片已经被resize好了。并用make_image分配空间,再用fill_image把image中的每个值初始化为0.5。

    image load_image_color(char *filename, int w, int h)
    {
        return load_image(filename, w, h, 3);
    }
    -------------------------------------------------------------
    image load_image(char *filename, int w, int h, int c)
    {
    #ifdef OPENCV
        image out = load_image_cv(filename, c);
    #else
        image out = load_image_stb(filename, c);
    #endif
    
        if((h && w) && (h != out.h || w != out.w)){
            image resized = resize_image(out, w, h);
            free_image(out);
            out = resized;
        }
        return out;
    }
    ------------------------------------------------------------
    image make_image(int w, int h, int c)
    {
        image out = make_empty_image(w,h,c);
        out.data = calloc(h*w*c, sizeof(float));
        return out;
    }
    ------------------------------------------------------------
    void fill_image(image m, float s)
    {
        int i;
        for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s;
    }
    

    3.设置超参数,设置部分超参数,对后续图片进行处理,部分超参数由外部cfg设置,部分超参数内部写死(比如scale参数)。

    float dw = jitter * orig.w;
    float dh = jitter * orig.h;
    
    float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
    //float scale = rand_uniform(.25, 2);
    float scale = 1;
    
    float nw, nh;
    
    if(new_ar < 1){
          nh = scale * h;
          nw = nh * new_ar;
    } else {
         nw = scale * w;
         nh = nw / new_ar;
    }
    
    float dx = rand_uniform(0, w - nw);
    float dy = rand_uniform(0, h - nh);
    place_image(orig, nw, nh, dx, dy, sized);
    random_distort_image(sized, hue, saturation, exposure);
    
    

    如上述代码所示,jitter是一种对宽高的随机抖动。scale是一种内部写死的表示对输入图片的宽高缩放(通过源码我们发现是对随机抖动后,较长的边进行缩放的)。其中place_image函数,实现了不论你上面做了多少尺度的scale的缩放,最终还是会放到sized大小的范围内来。

    void place_image(image im, int w, int h, int dx, int dy, image canvas)
    {
        int x, y, c;
        for(c = 0; c < im.c; ++c){
            for(y = 0; y < h; ++y){
                for(x = 0; x < w; ++x){
                    float rx = ((float)x / w) * im.w;
                    float ry = ((float)y / h) * im.h;
                    float val = bilinear_interpolate(im, rx, ry, c);
                    set_pixel(canvas, x + dx, y + dy, c, val);
                }
            }
        }
    }
    ----------------------------------------------------------------------------
    static float bilinear_interpolate(image im, float x, float y, int c)
    {
        int ix = (int) floorf(x);
        int iy = (int) floorf(y);
    
        float dx = x - ix;
        float dy = y - iy;
    
        float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
            dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
            (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
            dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
        return val;
    }
    ----------------------------------------------------------------------------
    static void set_pixel(image m, int x, int y, int c, float val)
    {
        if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
        assert(x < m.w && y < m.h && c < m.c);
        m.data[c*m.h*m.w + y*m.w + x] = val;
    }
    
    

    仔细剖析了place_image函数,对图片im的长宽通道数中的每个像素进行计算,如果缩放后的图比原始的小,则周边就用双线性差值进行填补(基本就是灰的),如果缩放后的图比原始的大,则那些部分就直接丢弃(通过set_pixel中的条件判断可以得出该结论)。
    4.备注,如下图为data,image等数据结构。

    typedef struct{
        int w, h;
        matrix X;
        matrix y;
        int shallow;
        int *num_boxes;
        box **boxes;
    } data;
    
    
    typedef struct{
        float x, y, w, h;//x,y中心点,w,h宽高(都是占比)
    } box;
    
    
    typedef struct matrix{
        int rows, cols;     // 矩阵的行与列数
        float **vals;       // 矩阵所存储的数据,二维数组
    } matrix;
    
    
    typedef struct {
        int h;
        int w;
        int c;
        float *data;
    } image;
    

    相关文章

      网友评论

          本文标题:【darknet训练细节】一个隐藏的超参数:scale

          本文链接:https://www.haomeiwen.com/subject/iybwfctx.html