2. 第二层DW主题层
2.1 DWD明细层
2.1.1 用户主题
DWD明细粒度事实层Hive数据库建库建表:
创建Hive库并进入:
create database if not exists dwd_nshop;
use dwd_nshop;
在DWD层开始入库之前,我们有一张通用字典表,在mysql已经导入,为后面的数据筛选做条件过滤:
2.1.1.1 用户启动日志表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_launch (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
launch_time_segment string COMMENT '启动时间段',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_launch/';
从ods层事件表里选择action=02(启动)的事件数据导入即可:
insert overwrite table dwd_nshop.dwd_nshop_actlog_launch partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
from_unixtime(cast(ct/1000 as int),'HH') as launch_time_segment,
ct
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action = "02"
2.1.1.2 用户产品浏览表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtview (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
target_id string COMMENT '产品ID',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtview/'
从ods层的事件表里筛选出action为07或08的数据,即为用户浏览数据:
insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtview partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_id')as target_id,
ct
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action in('07','08')
2.1.1.3 用户产品查询表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtsearch (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
target_order string COMMENT '查询排序方式',
target_keys string COMMENT '查询内容',
target_id string COMMENT '产品ID',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtsearch/';
查看公共字典表:
我们知道,选择action为05(交互)和event_type为01(浏览)或04(滑动)的行为是用户查询行为,因此写出下列sql语句
insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtsearch partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_order')as target_order,
get_json_object(extinfo,'$.target_keys')as target_keys,
get_json_object(extinfo,'$.target_id')as target_id,
ct
from
ods_nshop.ods_nshop_01_useractlog
--lateral view explode(split(regexp_replace(get_json_object(extinfo,'$target_ids'),'[\\[\\"\\]]',''),','))t as target_id
where
bdp_day = "20200618"
and
action = '05'
and
event_type in('01','04')
2.1.1.4 用户产品关注表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_actlog_product_comment (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
target_id string COMMENT '产品ID',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_actlog_product_comment/';
选择action ='05'(交互),event_type='02'(点击),并且target_action='01'(店铺关注)的事件即可:
with log_attend as(
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_action')as target_action,
get_json_object(extinfo,'$.target_id')as target_id,
extinfo,
ct,
bdp_day
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action ='05'
and
event_type='02'
)
insert overwrite table dwd_nshop.dwd_actlog_product_comment partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
target_id,
ct
from
log_attend where target_action='01'
2.1.2 交易主题
2.1.2.1 交易订单明细流水表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_orders_details (
order_id string COMMENT '订单ID',
order_status INT COMMENT '订单状态:5已收货(完成)|6投诉 7退货',
supplier_code string COMMENT '店铺ID',
product_code string COMMENT '商品ID',
customer_id string COMMENT '用户id',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
user_areacode string COMMENT '用户所在地区',
consignee_zipcode string COMMENT '收货人地址',
pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支 付(货到付款) 20 ',
pay_count INT COMMENT '支付次数',
product_price DECIMAL (5, 1) COMMENT '购买商品单价',
weighing_cost DECIMAL (2, 1) COMMENT '商品加权价格',
district_money DECIMAL (4, 1) COMMENT '优惠金额',
is_activity int COMMENT '1:参加活动|0:没有参加活动',
order_ctime BIGINT COMMENT '创建时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_nshop_orders_details/';
订单明细流水表的数据主要是从ods层的订单详细表里抽取,结合公共产品维度表查出产品相关信息,另外还结合订单支付记录表的数据,查出支付相关信息,sql语句如下:
with tborder as(
select
order_id,
order_status,
customer_id,
consignee_zipcode,
pay_type,
order_ctime,
from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_orders
where
from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') = '20200618'
),
tbdetail as(
select
a.order_id,
a.product_id,
a.product_cnt,
a.weighing_cost,
a.district_money,
a.is_activity,
b.supplier_code,
b.product_price,
from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_order_detail a
join ods_nshop.dim_pub_product b
on a.product_id=b.product_code
where
from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd')='20200618'
),
pays as (
select
count(*) as pay_count,
order_id,
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_orders_pay_records
where
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')='20200618'
group by
order_id,
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')
)
insert overwrite table dwd_nshop.dwd_nshop_orders_details partition (bdp_day)
select
tborder.order_id,
tborder.order_status,
tbdetail.supplier_code,
tbdetail.product_id,
tborder.customer_id,
tborder.consignee_zipcode,
tborder.pay_type,
pays.pay_count,
tbdetail.product_price,
tbdetail.product_cnt,
tbdetail.weighing_cost,
tbdetail.district_money,
tbdetail.is_activity,
tborder.order_ctime,
tborder.bdp_day
from tborder join tbdetail
on tborder.order_id=tbdetail.order_id
join pays
on tbdetail.order_id=pays.order_id
2.2.1 订单支付表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_order_pay (
pay_id string COMMENT '订单支付记录ID',
order_id string COMMENT '订单ID',
customer_id string COMMENT '用户ID',
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
longitude string COMMENT '经度',
latitude string COMMENT '纬度',
page_id string COMMENT '行为所在页',
target_action string COMMENT '参考点击目标类型说明',
target_id string COMMENT '订单id',
target_price DOUBLE COMMENT '订单价格',
target_status string COMMENT '支付状态:0 未支付| 1 支付中 | 2 已支付',
target_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线 下支付(货到付款) 20 ',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_order_pay/'
2.1.3 营销活动主题
广告投放数据表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_releasedatas (
customer_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
area_code string COMMENT '地区编码',
release_sid string COMMENT '投放请求id',
release_ip string COMMENT '投放方ip',
release_session string COMMENT '投放会话id',
release_sources string COMMENT '投放渠道',
release_category string COMMENT '投放浏览产品分类',
release_product string COMMENT '投放浏览产品',
release_product_page string COMMENT '投放浏览产品页',
ct BIGINT COMMENT '创建时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/release/dwd_nshop_releasedatas/'
这里需要关联两张中间表,一个是产品表,查出产品分类,一个是页面布局表,选择产品页,两张表结构如下:
页面布局表
插入数据:
with tbrelease as(
select
c.customer_id,
r.device_num ,
r.device_type ,
r.os ,
r.os_version ,
r.manufacturer ,
r.area_code ,
r.release_sid ,
parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','ip')as release_ip,
r.release_session,
r.release_sources,
parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','productPage')as release_product_page,
r.ct
from ods_nshop.ods_01_releasedatas r
join ods_nshop.ods_02_customer c
on r.customer_id=c.customer_id
where
bdp_day='20200618'
)
insert overwrite table dwd_nshop.dwd_nshop_releasedatas partition(bdp_day='20200618')
select
a.customer_id,
a.device_num ,
a.device_type ,
a.os ,
a.os_version ,
a.manufacturer,
a.area_code ,
a.release_sid ,
a.release_ip,
a.release_session,
a.release_sources,
f.category_code release_category,
p.page_target release_product,
a.release_product_page,
a.ct
from tbrelease a
join dim_nshop.dim_pub_page p
on a.release_product_page=p.page_code and p.page_type='4'
join dim_nshop.dim_pub_product f
on p.page_code=f.product_code;
网友评论