##有50W个店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:
1)每个店铺的UV(访客数)
2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
## 建表语句
1. 建表并导入数据
create table test02_visit (shop_name string, user_id string, visit_data string) row format delimited fields terminated by '\t' location '/data/test02';
2. 求每个店铺的UV(访客数)
select shop_name, count(distinct user_id) as uv from test02_visit group by shop_name;
+------------+-----+--+
| shop_name | uv |
+------------+-----+--+
| apple | 1 |
| huawei | 5 |
| icbc | 2 |
| jingdong | 1 |
+------------+-----+--+
select shop_name, count(*) as uv from (select shop_name, user_id from test02_visit group by shop_name, user_id) tmp group by shop_name;
+------------+-----+--+
| shop_name | uv |
+------------+-----+--+
| apple | 1 |
| huawei | 5 |
| icbc | 2 |
| jingdong | 1 |
+------------+-----+--+
3. 每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
select shop_name, user_id, count(*) as count_visit from test02_visit group by shop_name, user_id;
+------------+----------+--------------+--+
| shop_name | user_id | count_visit |
+------------+----------+--------------+--+
| apple | 1001 | 1 |
| huawei | 1001 | 4 |
| huawei | 1002 | 2 |
| huawei | 1003 | 1 |
| huawei | 1004 | 1 |
| huawei | 1005 | 1 |
| icbc | 1001 | 1 |
| icbc | 1002 | 1 |
| jingdong | 1006 | 1 |
+------------+----------+--------------+--+
select t1.shop_name, t1.user_id, t1.count_visit, row_number() over(partition by t1.shop_name order by t1.count_visit desc) rk from ( select shop_name, user_id, count(*) as count_visit from test02_visit group by shop_name, user_id) t1;
+---------------+-------------+-----------------+-----+--+
| t1.shop_name | t1.user_id | t1.count_visit | rk |
+---------------+-------------+-----------------+-----+--+
| apple | 1001 | 1 | 1 |
| huawei | 1001 | 4 | 1 |
| huawei | 1002 | 2 | 2 |
| huawei | 1005 | 1 | 3 |
| huawei | 1004 | 1 | 4 |
| huawei | 1003 | 1 | 5 |
| icbc | 1002 | 1 | 1 |
| icbc | 1001 | 1 | 2 |
| jingdong | 1006 | 1 | 1 |
+---------------+-------------+-----------------+-----+--+
select * from (select t1.shop_name, t1.user_id, t1.count_visit, row_number() over(partition by t1.shop_name order by t1.count_visit desc) rk from (select shop_name, user_id, count(*) as count_visit from test02_visit group by shop_name, user_id) t1) t2 where t2.rk <= 3;
+---------------+-------------+-----------------+--------+--+
| t2.shop_name | t2.user_id | t2.count_visit | t2.rk |
+---------------+-------------+-----------------+--------+--+
| apple | 1001 | 1 | 1 |
| huawei | 1001 | 4 | 1 |
| huawei | 1002 | 2 | 2 |
| huawei | 1005 | 1 | 3 |
| icbc | 1002 | 1 | 1 |
| icbc | 1001 | 1 | 2 |
| jingdong | 1006 | 1 | 1 |
+---------------+-------------+-----------------+--------+--+
网友评论