美文网首页数仓实战2(美团架构)
数仓实战06:数仓搭建-DWT层

数仓实战06:数仓搭建-DWT层

作者: 勇于自信 | 来源:发表于2020-08-06 18:37 被阅读0次
    1.设备主题宽表

    1)建表语句

    hive (gmall) > DROP TABLE
    IF EXISTS dwt_uv_topic;
    
    CREATE external TABLE dwt_uv_topic (
        `mid_id` string COMMENT '设备唯一标识',
        `user_id` string COMMENT '用户标识',
        `version_code` string COMMENT '程序版本号',
        `version_name` string COMMENT '程序版本名',
        `lang` string COMMENT '系统语言',
        `source` string COMMENT '渠道号',
        `os` string COMMENT '安卓系统版本',
        `area` string COMMENT '区域',
        `model` string COMMENT '手机型号',
        `brand` string COMMENT '手机品牌',
        `sdk_version` string COMMENT 'sdkVersion',
        `gmail` string COMMENT 'gmail',
        `height_width` string COMMENT '屏幕宽高',
        `app_time` string COMMENT '客户端日志产生时的时间',
        `network` string COMMENT '网络模式',
        `lng` string COMMENT '经度',
        `lat` string COMMENT '纬度',
        `login_date_first` string COMMENT '首次活跃时间',
        `login_date_last` string COMMENT '末次活跃时间',
        `login_day_count` BIGINT COMMENT '当日活跃次数',
        `login_count` BIGINT COMMENT '累积活跃天数'
    ) stored AS parquet location '/warehouse/gmall/dwt/dwt_uv_topic';
    

    2)数据装载

    hive (gmall) > INSERT overwrite TABLE dwt_uv_topic SELECT
        nvl (new.mid_id, old.mid_id),
        nvl (new.user_id, old.user_id),
        nvl (
            new.version_code,
            old.version_code
        ),
        nvl (
            new.version_name,
            old.version_name
        ),
        nvl (new.lang, old.lang),
        nvl (new.source, old.source),
        nvl (new.os, old.os),
        nvl (new.area, old.area),
        nvl (new.model, old.model),
        nvl (new.brand, old.brand),
        nvl (
            new.sdk_version,
            old.sdk_version
        ),
        nvl (new.gmail, old.gmail),
        nvl (
            new.height_width,
            old.height_width
        ),
        nvl (new.app_time, old.app_time),
        nvl (new.network, old.network),
        nvl (new.lng, old.lng),
        nvl (new.lat, old.lat),
    
    IF (
        old.mid_id IS NULL,
        '2020-03-10',
        old.login_date_first
    ),
    
    IF (
        new.mid_id IS NOT NULL,
        '2020-03-10',
        old.login_date_last
    ),
    
    IF (
        new.mid_id IS NOT NULL,
        new.login_count,
        0
    ),
     nvl (old.login_count, 0) +
    IF (new.login_count > 0, 1, 0)
    FROM
        (SELECT * FROM dwt_uv_topic) old
    FULL OUTER JOIN (
        SELECT
            *
        FROM
            dws_uv_detail_daycount
        WHERE
            dt = '2020-03-10'
    ) new ON old.mid_id = new.mid_id;
    

    3)查询加载结果
    hive (gmall)> select * from dwt_uv_topic limit 5;

    2.会员主题宽表

    宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段。


    1)建表语句

    hive (gmall) > DROP TABLE
    IF EXISTS dwt_user_topic;
    
    CREATE external TABLE dwt_user_topic (
        user_id string COMMENT '用户 id',
        login_date_first string COMMENT '首次登录时间',
        login_date_last string COMMENT '末次登录时间',
        login_count BIGINT COMMENT '累积登录天数',
        login_last_30d_count BIGINT COMMENT '最近 30 日登录天数',
        order_date_first string COMMENT '首次下单时间',
        order_date_last string COMMENT '末次下单时间',
        order_count BIGINT COMMENT '累积下单次数',
        order_amount DECIMAL (16, 2) COMMENT '累积下单金额',
        order_last_30d_count BIGINT COMMENT '最近 30 日下单次数',
        order_last_30d_amount BIGINT COMMENT '最近 30 日下单金额',
        payment_date_first string COMMENT '首次支付时间',
        payment_date_last string COMMENT '末次支付时间',
        payment_count DECIMAL (16, 2) COMMENT '累积支付次数',
        payment_amount DECIMAL (16, 2) COMMENT '累积支付金额',
        payment_last_30d_count DECIMAL (16, 2) COMMENT '最近 30 日支付次数',
        payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日支付金额'
    ) COMMENT '用户主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_user_topic/' tblproperties (
        "parquet.compression" = "lzo"
    );
    

    2)数据装载

    hive (gmall) > INSERT overwrite TABLE dwt_user_topic SELECT
        nvl (new.user_id, old.user_id),
    
    IF (
        old.login_date_first IS NULL
        AND new.login_count > 0,
        '2020-03-10',
        old.login_date_first
    ),
    
    IF (
        new.login_count > 0,
        '2020-03-10',
        old.login_date_last
    ),
     nvl (old.login_count, 0) +
    IF (new.login_count > 0, 1, 0),
     nvl (new.login_last_30d_count, 0),
    
    IF (
        old.order_date_first IS NULL
        AND new.order_count > 0,
        '2020-03-10',
        old.order_date_first
    ),
    
    IF (
        new.order_count > 0,
        '2020-03-10',
        old.order_date_last
    ),
     nvl (old.order_count, 0) + nvl (new.order_count, 0),
     nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
     nvl (new.order_last_30d_count, 0),
     nvl (
        new.order_last_30d_amount,
        0
    ),
    
    IF (
        old.payment_date_first IS NULL
        AND new.payment_count > 0,
        '2020-03-10',
        old.payment_date_first
    ),
    
    IF (
        new.payment_count > 0,
        '2020-03-10',
        old.payment_date_last
    ),
     nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
     nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
     nvl (
        new.payment_last_30d_count,
        0
    ),
     nvl (
        new.payment_last_30d_amount,
        0
    )
    FROM
        dwt_user_topic old
    FULL OUTER JOIN (
        SELECT
            user_id,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    login_count,
                    0
                )
            ) login_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    order_count,
                    0
                )
            ) order_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    order_amount,
                    0
                )
            ) order_amount,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    payment_count,
                    0
                )
            ) payment_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    payment_amount,
                    0
                )
            ) payment_amount,
            sum(IF(login_count > 0, 1, 0)) login_last_30d_count,
            sum(order_count) order_last_30d_count,
            sum(order_amount) order_last_30d_amount,
            sum(payment_count) payment_last_30d_count,
            sum(payment_amount) payment_last_30d_amount
        FROM
            dws_user_action_daycount
        WHERE
            dt >= date_add('2020-03-10' ,- 30)
        GROUP BY
            user_id
    ) new ON old.user_id = new.user_id;
    

    3)查询加载结果
    hive (gmall)> select * from dwt_user_topic limit 5;

    3.商品主题宽表

    1)建表语句

    hive (gmall) > DROP TABLE
    IF EXISTS dwt_sku_topic;
    
    CREATE external TABLE dwt_sku_topic (
        sku_id string COMMENT 'sku_id',
        spu_id string COMMENT 'spu_id',
        order_last_30d_count BIGINT COMMENT '最近 30 日被下单次数',
        order_last_30d_num BIGINT COMMENT '最近 30 日被下单件数',
        order_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被下单金额',
        order_count BIGINT COMMENT '累积被下单次数',
        order_num BIGINT COMMENT '累积被下单件数',
        order_amount DECIMAL (16, 2) COMMENT '累积被下单金额',
        payment_last_30d_count BIGINT COMMENT '最近 30 日被支付次数',
        payment_last_30d_num BIGINT COMMENT '最近 30 日被支付件数',
        payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被支付金额',
        payment_count BIGINT COMMENT '累积被支付次数',
        payment_num BIGINT COMMENT '累积被支付件数',
        payment_amount DECIMAL (16, 2) COMMENT '累积被支付金额',
        refund_last_30d_count BIGINT COMMENT '最近三十日退款次数',
        refund_last_30d_num BIGINT COMMENT '最近三十日退款件数',
        refund_last_30d_amount DECIMAL (10, 2) COMMENT '最近三十日退款金额',
        refund_count BIGINT COMMENT '累积退款次数',
        refund_num BIGINT COMMENT '累积退款件数',
        refund_amount DECIMAL (10, 2) COMMENT '累积退款金额',
        cart_last_30d_count BIGINT COMMENT '最近 30 日被加入购物车次数',
        cart_last_30d_num BIGINT COMMENT '最近 30 日被加入购物车件数',
        cart_count BIGINT COMMENT '累积被加入购物车次数',
        cart_num BIGINT COMMENT '累积被加入购物车件数',
        favor_last_30d_count BIGINT COMMENT '最近 30 日被收藏次数',
        favor_count BIGINT COMMENT '累积被收藏次数',
        appraise_last_30d_good_count BIGINT COMMENT '最近 30 日好评数',
        appraise_last_30d_mid_count BIGINT COMMENT '最近 30 日中评数',
        appraise_last_30d_bad_count BIGINT COMMENT '最近 30 日差评数',
        appraise_last_30d_default_count BIGINT COMMENT '最近 30 日默认评价数',
        appraise_good_count BIGINT COMMENT '累积好评数',
        appraise_mid_count BIGINT COMMENT '累积中评数',
        appraise_bad_count BIGINT COMMENT '累积差评数',
        appraise_default_count BIGINT COMMENT '累积默认评价数'
    ) COMMENT '商品主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_sku_topic/' tblproperties (
        "parquet.compression" = "lzo"
    );
    

    2)数据装载

    hive (gmall) > INSERT overwrite TABLE dwt_sku_topic SELECT
        nvl (new.sku_id, old.sku_id),
        sku_info.spu_id,
        nvl (new.order_count30, 0),
        nvl (new.order_num30, 0),
        nvl (new.order_amount30, 0),
        nvl (old.order_count, 0) + nvl (new.order_count, 0),
        nvl (old.order_num, 0) + nvl (new.order_num, 0),
        nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
        nvl (new.payment_count30, 0),
        nvl (new.payment_num30, 0),
        nvl (new.payment_amount30, 0),
        nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
        nvl (old.payment_num, 0) + nvl (new.payment_count, 0),
        nvl (old.payment_amount, 0) + nvl (new.payment_count, 0),
        nvl (new.refund_count30, 0),
        nvl (new.refund_num30, 0),
        nvl (new.refund_amount30, 0),
        nvl (old.refund_count, 0) + nvl (new.refund_count, 0),
        nvl (old.refund_num, 0) + nvl (new.refund_num, 0),
        nvl (old.refund_amount, 0) + nvl (new.refund_amount, 0),
        nvl (new.cart_count30, 0),
        nvl (new.cart_num30, 0),
        nvl (old.cart_count, 0) + nvl (new.cart_count, 0),
        nvl (old.cart_num, 0) + nvl (new.cart_num, 0),
        nvl (new.favor_count30, 0),
        nvl (old.favor_count, 0) + nvl (new.favor_count, 0),
        nvl (
            new.appraise_good_count30,
            0
        ),
        nvl (new.appraise_mid_count30, 0),
        nvl (new.appraise_bad_count30, 0),
        nvl (
            new.appraise_default_count30,
            0
        ),
        nvl (old.appraise_good_count, 0) + nvl (new.appraise_good_count, 0),
        nvl (old.appraise_mid_count, 0) + nvl (new.appraise_mid_count, 0),
        nvl (old.appraise_bad_count, 0) + nvl (new.appraise_bad_count, 0),
        nvl (
            old.appraise_default_count,
            0
        ) + nvl (
            new.appraise_default_count,
            0
        )
    FROM
        (
            SELECT
                sku_id,
                spu_id,
                order_last_30d_count,
                order_last_30d_num,
                order_last_30d_amount,
                order_count,
                order_num,
                order_amount,
                payment_last_30d_count,
                payment_last_30d_num,
                payment_last_30d_amount,
                payment_count,
                payment_num,
                payment_amount,
                refund_last_30d_count,
                refund_last_30d_num,
                refund_last_30d_amount,
                refund_count,
                refund_num,
                refund_amount,
                cart_last_30d_count,
                cart_last_30d_num,
                cart_count,
                cart_num,
                favor_last_30d_count,
                favor_count,
                appraise_last_30d_good_count,
                appraise_last_30d_mid_count,
                appraise_last_30d_bad_count,
                appraise_last_30d_default_count,
                appraise_good_count,
                appraise_mid_count,
                appraise_bad_count,
                appraise_default_count
            FROM
                dwt_sku_topic
        ) old
    FULL OUTER JOIN (
        SELECT
            sku_id,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    order_count,
                    0
                )
            ) order_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    order_num,
                    0
                )
            ) order_num,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    order_amount,
                    0
                )
            ) order_amount,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    payment_count,
                    0
                )
            ) payment_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    payment_num,
                    0
                )
            ) payment_num,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    payment_amount,
                    0
                )
            ) payment_amount,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    refund_count,
                    0
                )
            ) refund_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    refund_num,
                    0
                )
            ) refund_num,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    refund_amount,
                    0
                )
            ) refund_amount,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    cart_count,
                    0
                )
            ) cart_count,
            sum(
    
                IF (dt = '2020-03-10', cart_num, 0)
            ) cart_num,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    favor_count,
                    0
                )
            ) favor_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    appraise_good_count,
                    0
                )
            ) appraise_good_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    appraise_mid_count,
                    0
                )
            ) appraise_mid_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    appraise_bad_count,
                    0
                )
            ) appraise_bad_count,
            sum(
    
                IF (
                    dt = '2020-03-10',
                    appraise_default_count,
                    0
                )
            ) appraise_default_count,
            sum(order_count) order_count30,
            sum(order_num) order_num30,
            sum(order_amount) order_amount30,
            sum(payment_count) payment_count30,
            sum(payment_num) payment_num30,
            sum(payment_amount) payment_amount30,
            sum(refund_count) refund_count30,
            sum(refund_num) refund_num30,
            sum(refund_amount) refund_amount30,
            sum(cart_count) cart_count30,
            sum(cart_num) cart_num30,
            sum(favor_count) favor_count30,
            sum(appraise_good_count) appraise_good_count30,
            sum(appraise_mid_count) appraise_mid_count30,
            sum(appraise_bad_count) appraise_bad_count30,
            sum(appraise_default_count) appraise_default_count30
        FROM
            dws_sku_action_daycount
        WHERE
            dt >= date_add('2020-03-10', - 30)
        GROUP BY
            sku_id
    ) new ON new.sku_id = old.sku_id
    LEFT JOIN (
        SELECT
            *
        FROM
            dwd_dim_sku_info
        WHERE
            dt = '2020-03-10'
    ) sku_info ON nvl (new.sku_id, old.sku_id) = sku_info.id;
    

    3)查询加载结果
    hive (gmall)> select * from dwt_sku_topic limit 5;

    4.优惠券主题宽表

    1)建表语句

    hive (gmall) > DROP TABLE
    IF EXISTS dwt_coupon_topic;
    
    CREATE external TABLE dwt_coupon_topic (
        `coupon_id` string COMMENT '优惠券 ID',
        `get_day_count` BIGINT COMMENT '当日领用次数',
        `using_day_count` BIGINT COMMENT '当日使用(下单)次数',
        `used_day_count` BIGINT COMMENT '当日使用(支付)次数',
        `get_count` BIGINT COMMENT '累积领用次数',
        `using_count` BIGINT COMMENT '累积使用(下单)次数',
        `used_count` BIGINT COMMENT '累积使用(支付)次数'
    ) COMMENT '购物券主题宽表' stored AS parquetlocation '/warehouse/gmall/dwt/dwt_coupon_topic/' tblproperties (
        "parquet.compression" = "lzo"
    );
    

    2)数据装载

    hive (gmall)> insert overwrite table dwt_coupon_topic select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0) from ( select * from dwt_coupon_topic )old full outer join ( select coupon_id, get_count, using_count, used_count from dws_coupon_use_daycount where dt='2020-03-10' )new on old.coupon_id=new.coupon_id;
    

    3)查询加载结果
    hive (gmall)> select * from dwt_coupon_topic limit 5;

    5.活动主题宽表

    1)建表语句

    hive (gmall)> drop table if exists dwt_activity_topic;
    create external table dwt_activity_topic( `id` string COMMENT '活动 id', `activity_name` string COMMENT '活动名称', `order_day_count` bigint COMMENT '当日日下单次数', `payment_day_count` bigint COMMENT '当日支付次数', `order_count` bigint COMMENT '累积下单次数', `payment_count` bigint COMMENT '累积支付次数' ) COMMENT '活动主题宽表' row format delimited fields terminated by '\t' location '/warehouse/gmall/dwt/dwt_activity_topic/' tblproperties ("parquet.compression"="lzo");
    

    2)数据装载

    hive (gmall)> insert overwrite table dwt_activity_topic select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0) from ( select * from dwt_activity_topic )old full outer join ( select id, activity_name, order_count, payment_count from dws_activity_info_daycount where dt='2020-03-10' )new on old.id=new.id;
    

    3)查询加载结果
    hive (gmall)> select * from dwt_activity_topic limit 5;

    6.DWT层数据导入脚本

    1)在/home/atguigu/bin 目录下创建脚本 dws_to_dwt.sh

    [atguigu@hadoop102 bin]$ vim dws_to_dwt.sh
    

    在脚本中填写如下内容

    #!/bin/bash 
    APP=gmall 
    hive=/opt/module/hive/bin/hive 
    # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 
    if [ -n "$1" ] ;
    then 
    do
    _date=$1 
    else
    do
    _date=`date -d "-1 day" +%F` 
    fi
    sql="
    INSERT overwrite TABLE $ { APP }.dwt_uv_topic SELECT
        nvl (new.mid_id, old.mid_id),
        nvl (new.user_id, old.user_id),
        nvl (
            new.version_code,
            old.version_code
        ),
        nvl (
            new.version_name,
            old.version_name
        ),
        nvl (new.lang, old.lang),
        nvl (new.source, old.source),
        nvl (new.os, old.os),
        nvl (new.area, old.area),
        nvl (new.model, old.model),
        nvl (new.brand, old.brand),
        nvl (
            new.sdk_version,
            old.sdk_version
        ),
        nvl (new.gmail, old.gmail),
        nvl (
            new.height_width,
            old.height_width
        ),
        nvl (new.app_time, old.app_time),
        nvl (new.network, old.network),
        nvl (new.lng, old.lng),
        nvl (new.lat, old.lat),
        nvl (
            old.login_date_first,
            '$do_date'
        ),
    
    IF (
        new.login_count > 0,
        '$do_date',
        old.login_date_last
    ),
     nvl (new.login_count, 0),
     nvl (new.login_count, 0) + nvl (old.login_count, 0)
    FROM
        (
            SELECT
                *
            FROM
                $ { APP }.dwt_uv_topic
        ) old
    FULL OUTER JOIN (
        SELECT
            *
        FROM
            $ { APP }.dws_uv_detail_daycount
        WHERE
            dt = '$do_date'
    ) new ON old.mid_id = new.mid_id;
    
    INSERT overwrite TABLE $ { APP }.dwt_user_topic SELECT
        nvl (new.user_id, old.user_id),
    
    IF (
        old.login_date_first IS NULL
        AND new.login_count > 0,
        '$do_date',
        old.login_date_first
    ),
    
    IF (
        new.login_count > 0,
        '$do_date',
        old.login_date_last
    ),
     nvl (old.login_count, 0) +
    IF (new.login_count > 0, 1, 0),
     nvl (new.login_last_30d_count, 0),
    
    IF (
        old.order_date_first IS NULL
        AND new.order_count > 0,
        '$do_date',
        old.order_date_first
    ),
    
    IF (
        new.order_count > 0,
        '$do_date',
        old.order_date_last
    ),
     nvl (old.order_count, 0) + nvl (new.order_count, 0),
     nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
     nvl (new.order_last_30d_count, 0),
     nvl (
        new.order_last_30d_amount,
        0
    ),
    
    IF (
        old.payment_date_first IS NULL
        AND new.payment_count > 0,
        '$do_date',
        old.payment_date_first
    ),
    
    IF (
        new.payment_count > 0,
        '$do_date',
        old.payment_date_last
    ),
     nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
     nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
     nvl (
        new.payment_last_30d_count,
        0
    ),
     nvl (
        new.payment_last_30d_amount,
        0
    )
    FROM
        (
            SELECT
                *
            FROM
                $ { APP }.dwt_user_topic
        ) old
    FULL OUTER JOIN (
        SELECT
            user_id,
            sum(
    
                IF (
                    dt = '$do_date',
                    login_count,
                    0
                )
            ) login_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    order_count,
                    0
                )
            ) order_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    order_amount,
                    0
                )
            ) order_amount,
            sum(
    
                IF (
                    dt = '$do_date',
                    payment_count,
                    0
                )
            ) payment_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    payment_amount,
                    0
                )
            ) payment_amount,
            sum(IF(order_count > 0, 1, 0)) login_last_30d_count,
            sum(order_count) order_last_30d_count,
            sum(order_amount) order_last_30d_amount,
            sum(payment_count) payment_last_30d_count,
            sum(payment_amount) payment_last_30d_amount
        FROM
            $ { APP }.dws_user_action_daycount
        WHERE
            dt >= date_add('$do_date' ,- 30)
        GROUP BY
            user_id
    ) new ON old.user_id = new.user_id;
    
    WITH sku_act AS (
        SELECT
            sku_id,
            sum(
    
                IF (
                    dt = '$do_date',
                    order_count,
                    0
                )
            ) order_count,
            sum(
    
                IF (dt = '$do_date', order_num, 0)
            ) order_num,
            sum(
    
                IF (
                    dt = '$do_date',
                    order_amount,
                    0
                )
            ) order_amount,
            sum(
    
                IF (
                    dt = '$do_date',
                    payment_count,
                    0
                )
            ) payment_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    payment_num,
                    0
                )
            ) payment_num,
            sum(
    
                IF (
                    dt = '$do_date',
                    payment_amount,
                    0
                )
            ) payment_amount,
            sum(
    
                IF (
                    dt = '$do_date',
                    refund_count,
                    0
                )
            ) refund_count,
            sum(
    
                IF (dt = '$do_date', refund_num, 0)
            ) refund_num,
            sum(
    
                IF (
                    dt = '$do_date',
                    refund_amount,
                    0
                )
            ) refund_amount,
            sum(
    
                IF (dt = '$do_date', cart_count, 0)
            ) cart_count,
            sum(
    
                IF (dt = '$do_date', cart_num, 0)
            ) cart_num,
            sum(
    
                IF (
                    dt = '$do_date',
                    favor_count,
                    0
                )
            ) favor_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    appraise_good_count,
                    0
                )
            ) appraise_good_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    appraise_mid_count,
                    0
                )
            ) appraise_mid_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    appraise_bad_count,
                    0
                )
            ) appraise_bad_count,
            sum(
    
                IF (
                    dt = '$do_date',
                    appraise_default_count,
                    0
                )
            ) appraise_default_count,
            sum(order_count) order_count30,
            sum(order_num) order_num30,
            sum(order_amount) order_amount30,
            sum(payment_count) payment_count30,
            sum(payment_num) payment_num30,
            sum(payment_amount) payment_amount30,
            sum(refund_count) refund_count30,
            sum(refund_num) refund_num30,
            sum(refund_amount) refund_amount30,
            sum(cart_count) cart_count30,
            sum(cart_num) cart_num30,
            sum(favor_count) favor_count30,
            sum(appraise_good_count) appraise_good_count30,
            sum(appraise_mid_count) appraise_mid_count30,
            sum(appraise_bad_count) appraise_bad_count30,
            sum(appraise_default_count) appraise_default_count30
        FROM
            $ { APP }.dws_sku_action_daycount
        WHERE
            dt >= date_add('$do_date' ,- 30)
        GROUP BY
            sku_id
    ),
     sku_topic AS (
        SELECT
            sku_id,
            spu_id,
            order_last_30d_count,
            order_last_30d_num,
            order_last_30d_amount,
            order_count,
            order_num,
            order_amount,
            payment_last_30d_count,
            payment_last_30d_num,
            payment_last_30d_amount,
            payment_count,
            payment_num,
            payment_amount,
            refund_last_30d_count,
            refund_last_30d_num,
            refund_last_30d_amount,
            refund_count,
            refund_num,
            refund_amount,
            cart_last_30d_count,
            cart_last_30d_num,
            cart_count,
            cart_num,
            favor_last_30d_count,
            favor_count,
            appraise_last_30d_good_count,
            appraise_last_30d_mid_count,
            appraise_last_30d_bad_count,
            appraise_last_30d_default_count,
            appraise_good_count,
            appraise_mid_count,
            appraise_bad_count,
            appraise_default_count
        FROM
            $ { APP }.dwt_sku_topic
    ) INSERT overwrite TABLE $ { APP }.dwt_sku_topic SELECT
        nvl (
            sku_act.sku_id,
            sku_topic.sku_id
        ),
        sku_info.spu_id,
        nvl (sku_act.order_count30, 0),
        nvl (sku_act.order_num30, 0),
        nvl (sku_act.order_amount30, 0),
        nvl (sku_topic.order_count, 0) + nvl (sku_act.order_count, 0),
        nvl (sku_topic.order_num, 0) + nvl (sku_act.order_num, 0),
        nvl (sku_topic.order_amount, 0) + nvl (sku_act.order_amount, 0),
        nvl (sku_act.payment_count30, 0),
        nvl (sku_act.payment_num30, 0),
        nvl (sku_act.payment_amount30, 0),
        nvl (sku_topic.payment_count, 0) + nvl (sku_act.payment_count, 0),
        nvl (sku_topic.payment_num, 0) + nvl (sku_act.payment_count, 0),
        nvl (sku_topic.payment_amount, 0) + nvl (sku_act.payment_count, 0),
        nvl (refund_count30, 0),
        nvl (sku_act.refund_num30, 0),
        nvl (sku_act.refund_amount30, 0),
        nvl (sku_topic.refund_count, 0) + nvl (sku_act.refund_count, 0),
        nvl (sku_topic.refund_num, 0) + nvl (sku_act.refund_num, 0),
        nvl (sku_topic.refund_amount, 0) + nvl (sku_act.refund_amount, 0),
        nvl (sku_act.cart_count30, 0),
        nvl (sku_act.cart_num30, 0),
        nvl (sku_topic.cart_count, 0) + nvl (sku_act.cart_count, 0),
        nvl (sku_topic.cart_num, 0) + nvl (sku_act.cart_num, 0),
        nvl (sku_act.favor_count30, 0),
        nvl (sku_topic.favor_count, 0) + nvl (sku_act.favor_count, 0),
        nvl (
            sku_act.appraise_good_count30,
            0
        ),
        nvl (
            sku_act.appraise_mid_count30,
            0
        ),
        nvl (
            sku_act.appraise_bad_count30,
            0
        ),
        nvl (
            sku_act.appraise_default_count30,
            0
        ),
        nvl (
            sku_topic.appraise_good_count,
            0
        ) + nvl (
            sku_act.appraise_good_count,
            0
        ),
        nvl (
            sku_topic.appraise_mid_count,
            0
        ) + nvl (
            sku_act.appraise_mid_count,
            0
        ),
        nvl (
            sku_topic.appraise_bad_count,
            0
        ) + nvl (
            sku_act.appraise_bad_count,
            0
        ),
        nvl (
            sku_topic.appraise_default_count,
            0
        ) + nvl (
            sku_act.appraise_default_count,
            0
        )
    FROM
        sku_act
    FULL OUTER JOIN sku_topic ON sku_act.sku_id = sku_topic.sku_id
    LEFT JOIN (
        SELECT
            *
        FROM
            $ { APP }.dwd_dim_sku_info
        WHERE
            dt = '$do_date'
    ) sku_info ON nvl (
        sku_topic.sku_id,
        sku_act.sku_id
    ) = sku_info.id;
    
    INSERT overwrite TABLE $ { APP }.dwt_coupon_topic SELECT
        nvl (
            new.coupon_id,
            old.coupon_id
        ),
        nvl (new.get_count, 0),
        nvl (new.using_count, 0),
        nvl (new.used_count, 0),
        nvl (old.get_count, 0) + nvl (new.get_count, 0),
        nvl (old.using_count, 0) + nvl (new.using_count, 0),
        nvl (old.used_count, 0) + nvl (new.used_count, 0)
    FROM
        (
            SELECT
                *
            FROM
                $ { APP }.dwt_coupon_topic
        ) old
    FULL OUTER JOIN (
        SELECT
            coupon_id,
            get_count,
            using_count,
            used_count
        FROM
            $ { APP }.dws_coupon_use_daycount
        WHERE
            dt = '$do_date'
    ) new ON old.coupon_id = new.coupon_id;
    
    INSERT overwrite TABLE $ { APP }.dwt_activity_topic SELECT
        nvl (new.id, old.id),
        nvl (
            new.activity_name,
            old.activity_name
        ),
        nvl (new.order_count, 0),
        nvl (new.payment_count, 0),
        nvl (old.order_count, 0) + nvl (new.order_count, 0),
        nvl (old.payment_count, 0) + nvl (new.payment_count, 0)
    FROM
        (
            SELECT
                *
            FROM
                $ { APP }.dwt_activity_topic
        ) old
    FULL OUTER JOIN (
        SELECT
            id,
            activity_name,
            order_count,
            payment_count
        FROM
            $ { APP }.dws_activity_info_daycount
        WHERE
            dt = '$do_date'
    ) new ON old.id = new.id;
    "
    $hive -e "$sql"
    

    2)增加脚本执行权限

    [atguigu@hadoop102 bin]$ chmod 777 dws_to_dwt.sh
    

    3)执行脚本导入数据

    [atguigu@hadoop102 bin]$ dws_to_dwt.sh 2020-03-11
    

    4)查看导入数据

    hive (gmall)> 
    select * from dwt_uv_topic limit 5; 
    select * from dwt_user_topic limit 5; 
    select * from dwt_sku_topic limit 5; 
    select * from dwt_coupon_topic limit 5; 
    select * from dwt_activity_topic limit 5;
    

    相关文章

      网友评论

        本文标题:数仓实战06:数仓搭建-DWT层

        本文链接:https://www.haomeiwen.com/subject/imizrktx.html