表一:orders(订单表)
create table orders(
order_id string,
user_id string,
eval_set string,
order_number string,
order_dow string,
order_hour_of_day string,
days_since_prior_order string
)
表二:prior(商品表)
create table priors(
order_id string,
product_id string,
add_to_cart_order string,
reordered string
)
row format delimited fields terminated by ',' lines terminated by '\n';
业务1:
查询每个用户有多少个订单?
select user_id,count(1) as order_cnt
from orders
group by user_id
order by order_cnt desc
limit 10;
业务2:
每个用户平均每个订单平均是多少商品 ?
思路,某个用户平均每个订单平均的商品数量=某个用户一共购买的商品数量/一共购买的订单数量
1.先在商品表算出一个订单有多少个商品
select order_id,count(1) as pri_cnt
from priors
group by order_id
2.将商品表的order_id和订单表的order_id关联起来,把订单中的产品数量信息带到用户信息里
select odr.user_id,pri.order_id,pri_cnt
from orders odr
join
(
select order_id,count(1) as pri_cnt
from priors
group by order_id) pri
on odr.order_id=pri.order_id
limit 10;
3.求和,算出每个用户总共购买多少商品
select user_id,sum(pri_cnt)
from(
select odr.user_id,pri.order_id,pri_cnt
from orders odr
join
(
select order_id,count(1) as pri_cnt
from priors
group by order_id) pri
on odr.order_id=pri.order_id
)t
group by user_id
limit 10;
4.求和,算出某个用户一共购买多少订单
每个用户有多少个订单:
select user_id,count(1)
from orders
group by user_id;
5.求平均
select user_id,
sum(prod_cnt)/count(1) as sc_prod,
avg(prod_cnt) as avg_prod
from (select * from orders where eval_set='prior')od --如果不是prior统计为0
join (
select order_id,count(1) as prod_cnt
from priors
group by order_id
limit 10000)pro
on od.order_id=pro.order_id
group by user_id
limit 10;
业务3:每个用户在一周中的购买订单的分布(列转行)
user_id,dow0,dow1,dow2,dow3,dow4...dow6
1 0 0 1 2 2 0
select
user_id,
sum(case order_dow when '0' then 1 else 0 end) as dow0,
sum(case order_dow when '1' then 1 else 0 end) as dow1,
sum(case order_dow when '2' then 1 else 0 end) as dow2,
sum(case order_dow when '3' then 1 else 0 end) as dow3,
sum(case order_dow when '4' then 1 else 0 end) as dow4,
sum(case order_dow when '5' then 1 else 0 end) as dow5,
sum(case order_dow when '6' then 1 else 0 end) as dow6
from orders
group by user_id
limit 10;
业务4:
每个用户平均每个购买天中,购买的商品数量是多少?
比如有一个user ,有两天是购买商品的 :
day 1 :两个订单,1订单:10个product,2订单:15个product , 共25个
day 2:一个订单, 1订单:12个product 共12个
day 3: 没有购买
这个user 平均每个购买天的商品数量:
(10+15+12)/2 ,这里是2天有购买行为,不是三天,也不是3个订单。
select ord.user_id,sum(pri.products_cnt)/ count(distinct ord.days_since_prior_order) as avg_prod
--date: 2离第一个商品10天,3离第二个10天
from
(select order_id,
user_id,
if(days_since_prior_order='','-1',days_since_prior_order) as days_since_prior_order
from orders where eval_set='prior')ord
join
(select order_id,count(1) as products_cnt from priors group by order_id)pri
on ord.order_id=pri.order_id
group by ord.user_id
limit 10;
业务5:每个用户最喜爱购买的三个product是什么,最终表结构可以是三个列,也可以是一个字符串/数组
select user_id,
collect_list(concat_ws('_',product_id,cast(row_num as string))) as pro_top3,
size(collect_list(product_id)) as top_size
from
(
select user_id,
product_id,
row_number() over(partition by user_id order by top_cnt desc) as row_num
from
(
-- user_id,product_id,count(1)这个用户其中一个商品购买了几次
select
ord.user_id as user_id,
pri.product_id as product_id,
count(1) over(partition by user_id,product_id) as top_cnt
from
(select * from orders where eval_set='prior')ord
join
(select * from priors limit 10000)pri
on ord.order_id=pri.order_id
-- group by ord.user_id,pri.product_id
)t
)t1 where row_num <=3 group by user_id limit 10;
网友评论