美文网首页
百问中台:数据中台ODDA之DWS层建模

百问中台:数据中台ODDA之DWS层建模

作者: sknfie | 来源:发表于2021-03-30 14:30 被阅读0次

    概述

    DWD完成后,接下来就是进行DWS建模。将具有相同分析主题的DWD层数据,聚合层宽表模型,便于数据分析和计算。

    ODDA建模

    DWS模型

    从订单表、用户表和支付流水表,用户行为表
    从订单表、订单详情表、商品表、用户表和支付流水表,用户购买商品明细表

    1. 建表

    -- 进入数据库
    use mall;
    
    -- 创建用户行为宽表
    drop table if exists dws_user_action;
    create  external table dws_user_action
    (
        user_id         string      comment '用户 id',
        order_count     bigint      comment '下单次数 ',
        order_amount    decimal(16,2)  comment '下单金额 ',
        payment_count   bigint      comment '支付次数',
        payment_amount  decimal(16,2) comment '支付金额 '
    ) COMMENT '每日用户行为宽表'
    PARTITIONED BY ( `dt` string)
    stored as  parquet
    location '/warehouse/mall/dws/dws_user_action/'
    tblproperties ("parquet.compression"="snappy");
    
    -- 创建用户购买商品明细表
    drop table if exists  dws_sale_detail_daycount;
    create external table  dws_sale_detail_daycount
    (   user_id   string  comment '用户 id',
        sku_id    string comment '商品 Id',
        user_gender  string comment '用户性别',
        user_age string  comment '用户年龄',
        user_level string comment '用户等级',
        order_price decimal(10,2) comment '订单价格',
        sku_name string   comment '商品名称',
        sku_tm_id string   comment '品牌id',
        sku_category3_id string comment '商品三级品类id',
        sku_category2_id string comment '商品二级品类id',
        sku_category1_id string comment '商品一级品类id',
        sku_category3_name string comment '商品三级品类名称',
        sku_category2_name string comment '商品二级品类名称',
        sku_category1_name string comment '商品一级品类名称',
        spu_id  string comment '商品 spu',
        sku_num  int comment '购买个数',
        order_count string comment '当日下单单数',
        order_amount string comment '当日下单金额'
    ) COMMENT '用户购买商品明细表'
    PARTITIONED BY ( `dt` string)
    stored as  parquet
    location '/warehouse/mall/dws/dws_user_sale_detail_daycount/'
    tblproperties ("parquet.compression"="snappy");
    

    执行建表语句:

    hive -f /home/warehouse/sql/dws_ddl.sql
    

    2. 脚本

    #!/bin/bash
    
    # 定义变量方便修改
    APP=mall
    hive=hive
    
    # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
    if [ -n $1 ] ;then
            log_date=$1
    else 
            log_date=`date  -d "-1 day"  +%F`  
    fi 
    
    # 用户行为宽表
    function user_actions()
    {
        # 定义变量
        APP=$1
        hive=$2
        log_date=$3
    
        sql="
        
        with  
        tmp_order as
        (
            select 
            user_id, 
            sum(oc.total_amount) order_amount, 
            count(*)  order_count
            from "$APP".dwd_order_info  oc
            where date_format(oc.create_time,'yyyy-MM-dd')='$log_date'
            group by user_id
        ),
        tmp_payment as
        (
            select 
            user_id, 
            sum(pi.total_amount) payment_amount, 
            count(*) payment_count 
            from "$APP".dwd_payment_info pi 
            where date_format(pi.payment_time,'yyyy-MM-dd')='$log_date'
            group by user_id
        )
    
        insert  overwrite table "$APP".dws_user_action partition(dt='$log_date')
        select 
            user_actions.user_id, 
            sum(user_actions.order_count), 
            sum(user_actions.order_amount),
            sum(user_actions.payment_count), 
            sum(user_actions.payment_amount)
        from 
        (
            select 
            user_id, 
            order_count,
            order_amount ,
            0 payment_count , 
            0 payment_amount
            from tmp_order 
    
            union all
            select 
            user_id, 
            0,
            0, 
            payment_count, 
            payment_amount
            from tmp_payment
        ) user_actions
        group by user_id;
    
        "
    
        $hive -e "$sql"
    }
    
    function user_sales()
    {
        # 定义变量
        APP=$1
        hive=$2
        log_date=$3
    
        sql="
    
        set hive.exec.dynamic.partition.mode=nonstrict;
    
        with
        tmp_detail as
        (
            select 
                user_id,
                sku_id, 
                sum(sku_num) sku_num ,   
                count(*) order_count , 
                sum(od.order_price*sku_num)  order_amount 
            from "$APP".dwd_order_detail od
            where od.dt='$log_date' and user_id is not null
            group by user_id, sku_id
        )  
        insert overwrite table  "$APP".dws_sale_detail_daycount partition(dt='$log_date')
        select 
            tmp_detail.user_id,
            tmp_detail.sku_id,
            u.gender,
            months_between('$log_date', u.birthday)/12  age, 
            u.user_level,
            price,
            sku_name,
            tm_id,
            category3_id ,  
            category2_id ,  
            category1_id ,  
            category3_name ,  
            category2_name ,  
            category1_name ,  
            spu_id,
            tmp_detail.sku_num,
            tmp_detail.order_count,
            tmp_detail.order_amount 
        from tmp_detail 
        left join "$APP".dwd_user_info u 
        on u.id=tmp_detail.user_id  and u.dt='$log_date'
        left join "$APP".dwd_sku_info s on tmp_detail.sku_id =s.id  and s.dt='$log_date';
    
        "
        $hive -e "$sql"
    }
    
    user_actions $APP $hive $log_date
    

    3. 结果

    hive> select * from dws_user_action limit 2;
    OK
    SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
    SLF4J: Defaulting to no-operation (NOP) logger implementation
    SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
    1       1       402     0       0       2021-03-24
    10      1       205     1       205     2021-03-24
    Time taken: 0.843 seconds, Fetched: 2 row(s)
    hive> select * from dws_sale_detail_daycount limit 2;
    OK
    1       135     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2       1       8958.0  2021-03-24
    1       141     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    1       1       571.0   2021-03-24
    Time taken: 0.119 seconds, Fetched: 2 row(s)
    

    相关文章

      网友评论

          本文标题:百问中台:数据中台ODDA之DWS层建模

          本文链接:https://www.haomeiwen.com/subject/pktbhltx.html