美文网首页数仓实战1(阿里架构)
数仓项目02:DWD明细粒度事实层

数仓项目02:DWD明细粒度事实层

作者: 勇于自信 | 来源:发表于2020-06-24 11:44 被阅读0次

    2. 第二层DW主题层

    2.1 DWD明细层

    2.1.1 用户主题

    DWD明细粒度事实层Hive数据库建库建表:
    创建Hive库并进入:
    create database if not exists dwd_nshop;
    use dwd_nshop;

    在DWD层开始入库之前,我们有一张通用字典表,在mysql已经导入,为后面的数据筛选做条件过滤:


    2.1.1.1 用户启动日志表【DWD】
    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_launch (
    user_id string COMMENT '用户id',
    device_num string COMMENT '设备号',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    network_type string COMMENT '网络类型',
    area_code string COMMENT '地区编码',
    launch_time_segment string COMMENT '启动时间段',
    ct BIGINT COMMENT '产生时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_launch/';
    

    从ods层事件表里选择action=02(启动)的事件数据导入即可:

    insert overwrite table dwd_nshop.dwd_nshop_actlog_launch partition(bdp_day="20200618")
    select
    customer_id,
    device_num,
    device_type,
    os,
    os_version,
    manufacturer,
    carrier,
    network_type,
    area_code,
    from_unixtime(cast(ct/1000 as int),'HH') as launch_time_segment,
    ct
    from
    ods_nshop.ods_nshop_01_useractlog
    where
    bdp_day = "20200618"
    and 
    action = "02"
    
    2.1.1.2 用户产品浏览表【DWD】
    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtview (
    user_id string COMMENT '用户id',
    device_num string COMMENT '设备号',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    network_type string COMMENT '网络类型',
    area_code string COMMENT '地区编码',
    target_id string COMMENT '产品ID',
    ct BIGINT COMMENT '产生时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtview/'
    

    从ods层的事件表里筛选出action为07或08的数据,即为用户浏览数据:

    insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtview partition(bdp_day="20200618")
    select
    customer_id,
    device_num,
    device_type,
    os,
    os_version,
    manufacturer,
    carrier,
    network_type,
    area_code,
    get_json_object(extinfo,'$.target_id')as target_id,
    ct
    from
    ods_nshop.ods_nshop_01_useractlog
    where
    bdp_day = "20200618"
    and 
    action in('07','08')
    
    2.1.1.3 用户产品查询表【DWD】
    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtsearch (
    user_id string COMMENT '用户id',
    device_num string COMMENT '设备号',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    network_type string COMMENT '网络类型',
    area_code string COMMENT '地区编码',
    target_order string COMMENT '查询排序方式',
    target_keys string COMMENT '查询内容',
    target_id string COMMENT '产品ID',
    ct BIGINT COMMENT '产生时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtsearch/';
    

    查看公共字典表:




    我们知道,选择action为05(交互)和event_type为01(浏览)或04(滑动)的行为是用户查询行为,因此写出下列sql语句

    insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtsearch partition(bdp_day="20200618")
    select
    customer_id,
    device_num,
    device_type,
    os,
    os_version,
    manufacturer,
    carrier,
    network_type,
    area_code,
    get_json_object(extinfo,'$.target_order')as target_order,
    get_json_object(extinfo,'$.target_keys')as target_keys,
    get_json_object(extinfo,'$.target_id')as target_id,
    ct
    from
    ods_nshop.ods_nshop_01_useractlog
    --lateral view explode(split(regexp_replace(get_json_object(extinfo,'$target_ids'),'[\\[\\"\\]]',''),','))t as target_id
    where
    bdp_day = "20200618"
    and 
    action = '05'
    and
    event_type in('01','04') 
    
    2.1.1.4 用户产品关注表【DWD】
    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_actlog_product_comment (
    user_id string COMMENT '用户id',
    device_num string COMMENT '设备号',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    network_type string COMMENT '网络类型',
    area_code string COMMENT '地区编码',
    target_id string COMMENT '产品ID',
    ct BIGINT COMMENT '产生时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_actlog_product_comment/';
    

    选择action ='05'(交互),event_type='02'(点击),并且target_action='01'(店铺关注)的事件即可:

    with log_attend as(
    select
    customer_id,
    device_num,
    device_type,
    os,
    os_version,
    manufacturer,
    carrier,
    network_type,
    area_code,
    get_json_object(extinfo,'$.target_action')as target_action,
    get_json_object(extinfo,'$.target_id')as target_id,
    extinfo,
    ct,
    bdp_day
    from
    ods_nshop.ods_nshop_01_useractlog
    where
    bdp_day = "20200618"
    and 
    action ='05'
    and 
    event_type='02'
    )
    
    insert overwrite table dwd_nshop.dwd_actlog_product_comment partition(bdp_day="20200618")
    select
    customer_id,
    device_num,
    device_type,
    os,
    os_version,
    manufacturer,
    carrier,
    network_type,
    area_code,
    target_id,
    ct
    from 
    log_attend where target_action='01'
    
    2.1.2 交易主题
    2.1.2.1 交易订单明细流水表【DWD】
    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_nshop_orders_details (
    order_id string COMMENT '订单ID',
    order_status INT COMMENT '订单状态:5已收货(完成)|6投诉 7退货',
    supplier_code string COMMENT '店铺ID',
    product_code string COMMENT '商品ID',
    customer_id string COMMENT '用户id',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    user_areacode string COMMENT '用户所在地区',
    consignee_zipcode string COMMENT '收货人地址',
    pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支 付(货到付款) 20 ',
    pay_count INT COMMENT '支付次数',
    product_price DECIMAL (5, 1) COMMENT '购买商品单价',
    weighing_cost DECIMAL (2, 1) COMMENT '商品加权价格',
    district_money DECIMAL (4, 1) COMMENT '优惠金额',
    is_activity int COMMENT '1:参加活动|0:没有参加活动',
    order_ctime BIGINT COMMENT '创建时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_nshop_orders_details/';
    

    订单明细流水表的数据主要是从ods层的订单详细表里抽取,结合公共产品维度表查出产品相关信息,另外还结合订单支付记录表的数据,查出支付相关信息,sql语句如下:

    with tborder as(
      select
        order_id,
        order_status,
        customer_id,
        consignee_zipcode,
        pay_type,
        order_ctime,
        from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
      from ods_nshop.ods_02_orders
      where 
        from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') = '20200618'
    ),
    tbdetail as(
      select 
        a.order_id,
        a.product_id,
        a.product_cnt,
        a.weighing_cost,
        a.district_money,
        a.is_activity,
        b.supplier_code,
        b.product_price,
        from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
      from ods_nshop.ods_02_order_detail a
      join ods_nshop.dim_pub_product b 
      on a.product_id=b.product_code 
      where   
    from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd')='20200618'
    ),
    pays as (
        select 
            count(*) as pay_count,
            order_id,
            from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
        from  ods_nshop.ods_02_orders_pay_records 
        where 
           from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')='20200618'
        group by 
         order_id,
        from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')
    )
    insert overwrite table dwd_nshop.dwd_nshop_orders_details partition (bdp_day)
    select 
        tborder.order_id,
        tborder.order_status,
        tbdetail.supplier_code,
        tbdetail.product_id,
        tborder.customer_id,
        tborder.consignee_zipcode,
        tborder.pay_type,
        pays.pay_count,
        tbdetail.product_price,
        tbdetail.product_cnt,
        tbdetail.weighing_cost,
        tbdetail.district_money,
        tbdetail.is_activity,
        tborder.order_ctime,
        tborder.bdp_day
    from tborder join tbdetail 
    on tborder.order_id=tbdetail.order_id
    join pays 
    on tbdetail.order_id=pays.order_id
    

    2.2.1 订单支付表【DWD】

    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_order_pay (
    pay_id string COMMENT '订单支付记录ID',
    order_id string COMMENT '订单ID',
    customer_id string COMMENT '用户ID',
    user_id string COMMENT '用户id',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    carrier string COMMENT '电信运营商',
    network_type string COMMENT '网络类型',
    area_code string COMMENT '地区编码',
    longitude string COMMENT '经度',
    latitude string COMMENT '纬度',
    page_id string COMMENT '行为所在页',
    target_action string COMMENT '参考点击目标类型说明',
    target_id string COMMENT '订单id',
    target_price DOUBLE COMMENT '订单价格',
    target_status string COMMENT '支付状态:0 未支付| 1 支付中 | 2 已支付',
    target_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线 下支付(货到付款) 20 ',
    ct BIGINT COMMENT '产生时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_order_pay/'
    
    2.1.3 营销活动主题

    广告投放数据表【DWD】

    CREATE external TABLE
    IF NOT EXISTS dwd_nshop.dwd_nshop_releasedatas (
    customer_id string COMMENT '用户id',
    device_num string COMMENT '设备号',
    device_type string COMMENT '设备类型',
    os string COMMENT '手机系统',
    os_version string COMMENT '手机系统版本',
    manufacturer string COMMENT '手机制造商',
    area_code string COMMENT '地区编码',
    release_sid string COMMENT '投放请求id',
    release_ip string COMMENT '投放方ip',
    release_session string COMMENT '投放会话id',
    release_sources string COMMENT '投放渠道',
    release_category string COMMENT '投放浏览产品分类',
    release_product string COMMENT '投放浏览产品',
    release_product_page string COMMENT '投放浏览产品页',
    ct BIGINT COMMENT '创建时间'
    ) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/release/dwd_nshop_releasedatas/'
    

    这里需要关联两张中间表,一个是产品表,查出产品分类,一个是页面布局表,选择产品页,两张表结构如下:


    页面布局表

    插入数据:

    with tbrelease as(
    select
    c.customer_id,
    r.device_num ,
    r.device_type ,
    r.os , 
    r.os_version ,
    r.manufacturer ,
    r.area_code ,
    r.release_sid ,
    parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','ip')as release_ip,
    r.release_session,
    r.release_sources,   
    parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','productPage')as release_product_page,
    r.ct
    from ods_nshop.ods_01_releasedatas r
    join ods_nshop.ods_02_customer c
    on r.customer_id=c.customer_id 
    where 
    bdp_day='20200618'
    )
    insert overwrite table dwd_nshop.dwd_nshop_releasedatas partition(bdp_day='20200618')
    select 
    a.customer_id,
    a.device_num ,
    a.device_type ,
    a.os , 
    a.os_version ,
    a.manufacturer, 
    a.area_code ,
    a.release_sid ,
    a.release_ip,
    a.release_session,
    a.release_sources, 
    f.category_code release_category,
    p.page_target release_product,
    a.release_product_page,
    a.ct
    from tbrelease a
    join dim_nshop.dim_pub_page p 
    on a.release_product_page=p.page_code and p.page_type='4'
    join dim_nshop.dim_pub_product f
    on p.page_code=f.product_code;
    

    相关文章

      网友评论

        本文标题:数仓项目02:DWD明细粒度事实层

        本文链接:https://www.haomeiwen.com/subject/qhkrfktx.html