美文网首页
百问中台:数据中台ODDA之DWS层建模

百问中台:数据中台ODDA之DWS层建模

作者: sknfie | 来源:发表于2021-03-30 14:30 被阅读0次

概述

DWD完成后,接下来就是进行DWS建模。将具有相同分析主题的DWD层数据,聚合层宽表模型,便于数据分析和计算。

ODDA建模

DWS模型

从订单表、用户表和支付流水表,用户行为表
从订单表、订单详情表、商品表、用户表和支付流水表,用户购买商品明细表

1. 建表

-- 进入数据库
use mall;

-- 创建用户行为宽表
drop table if exists dws_user_action;
create  external table dws_user_action
(
    user_id         string      comment '用户 id',
    order_count     bigint      comment '下单次数 ',
    order_amount    decimal(16,2)  comment '下单金额 ',
    payment_count   bigint      comment '支付次数',
    payment_amount  decimal(16,2) comment '支付金额 '
) COMMENT '每日用户行为宽表'
PARTITIONED BY ( `dt` string)
stored as  parquet
location '/warehouse/mall/dws/dws_user_action/'
tblproperties ("parquet.compression"="snappy");

-- 创建用户购买商品明细表
drop table if exists  dws_sale_detail_daycount;
create external table  dws_sale_detail_daycount
(   user_id   string  comment '用户 id',
    sku_id    string comment '商品 Id',
    user_gender  string comment '用户性别',
    user_age string  comment '用户年龄',
    user_level string comment '用户等级',
    order_price decimal(10,2) comment '订单价格',
    sku_name string   comment '商品名称',
    sku_tm_id string   comment '品牌id',
    sku_category3_id string comment '商品三级品类id',
    sku_category2_id string comment '商品二级品类id',
    sku_category1_id string comment '商品一级品类id',
    sku_category3_name string comment '商品三级品类名称',
    sku_category2_name string comment '商品二级品类名称',
    sku_category1_name string comment '商品一级品类名称',
    spu_id  string comment '商品 spu',
    sku_num  int comment '购买个数',
    order_count string comment '当日下单单数',
    order_amount string comment '当日下单金额'
) COMMENT '用户购买商品明细表'
PARTITIONED BY ( `dt` string)
stored as  parquet
location '/warehouse/mall/dws/dws_user_sale_detail_daycount/'
tblproperties ("parquet.compression"="snappy");

执行建表语句:

hive -f /home/warehouse/sql/dws_ddl.sql

2. 脚本

#!/bin/bash

# 定义变量方便修改
APP=mall
hive=hive

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
        log_date=$1
else 
        log_date=`date  -d "-1 day"  +%F`  
fi 

# 用户行为宽表
function user_actions()
{
    # 定义变量
    APP=$1
    hive=$2
    log_date=$3

    sql="
    
    with  
    tmp_order as
    (
        select 
        user_id, 
        sum(oc.total_amount) order_amount, 
        count(*)  order_count
        from "$APP".dwd_order_info  oc
        where date_format(oc.create_time,'yyyy-MM-dd')='$log_date'
        group by user_id
    ),
    tmp_payment as
    (
        select 
        user_id, 
        sum(pi.total_amount) payment_amount, 
        count(*) payment_count 
        from "$APP".dwd_payment_info pi 
        where date_format(pi.payment_time,'yyyy-MM-dd')='$log_date'
        group by user_id
    )

    insert  overwrite table "$APP".dws_user_action partition(dt='$log_date')
    select 
        user_actions.user_id, 
        sum(user_actions.order_count), 
        sum(user_actions.order_amount),
        sum(user_actions.payment_count), 
        sum(user_actions.payment_amount)
    from 
    (
        select 
        user_id, 
        order_count,
        order_amount ,
        0 payment_count , 
        0 payment_amount
        from tmp_order 

        union all
        select 
        user_id, 
        0,
        0, 
        payment_count, 
        payment_amount
        from tmp_payment
    ) user_actions
    group by user_id;

    "

    $hive -e "$sql"
}

function user_sales()
{
    # 定义变量
    APP=$1
    hive=$2
    log_date=$3

    sql="

    set hive.exec.dynamic.partition.mode=nonstrict;

    with
    tmp_detail as
    (
        select 
            user_id,
            sku_id, 
            sum(sku_num) sku_num ,   
            count(*) order_count , 
            sum(od.order_price*sku_num)  order_amount 
        from "$APP".dwd_order_detail od
        where od.dt='$log_date' and user_id is not null
        group by user_id, sku_id
    )  
    insert overwrite table  "$APP".dws_sale_detail_daycount partition(dt='$log_date')
    select 
        tmp_detail.user_id,
        tmp_detail.sku_id,
        u.gender,
        months_between('$log_date', u.birthday)/12  age, 
        u.user_level,
        price,
        sku_name,
        tm_id,
        category3_id ,  
        category2_id ,  
        category1_id ,  
        category3_name ,  
        category2_name ,  
        category1_name ,  
        spu_id,
        tmp_detail.sku_num,
        tmp_detail.order_count,
        tmp_detail.order_amount 
    from tmp_detail 
    left join "$APP".dwd_user_info u 
    on u.id=tmp_detail.user_id  and u.dt='$log_date'
    left join "$APP".dwd_sku_info s on tmp_detail.sku_id =s.id  and s.dt='$log_date';

    "
    $hive -e "$sql"
}

user_actions $APP $hive $log_date

3. 结果

hive> select * from dws_user_action limit 2;
OK
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
1       1       402     0       0       2021-03-24
10      1       205     1       205     2021-03-24
Time taken: 0.843 seconds, Fetched: 2 row(s)
hive> select * from dws_sale_detail_daycount limit 2;
OK
1       135     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2       1       8958.0  2021-03-24
1       141     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    1       1       571.0   2021-03-24
Time taken: 0.119 seconds, Fetched: 2 row(s)

相关文章

网友评论

      本文标题:百问中台:数据中台ODDA之DWS层建模

      本文链接:https://www.haomeiwen.com/subject/pktbhltx.html