题目:
现有城市网吧访问数据,字段:网吧id,访客id(身份证号),上线时间,下线时间
规则1、如果有两个用户在一家网吧的前后上下线时间在10分钟以内,则两人可能认识
规则2、如果这两个用户在三家以上网吧出现【规则1】的情况,则两人一定认识
需求:
该城市上网用户中两人一定认识的组合数
该题可以选用自己擅长的任何技术来解决,可以是JAVA、Python、C、C++编程语言,也可以是Hadoop,Spark大数据工具
参考答案:
数据库版本:Server version: 8.0.20 MySQL Community Server - GPL
建表语句
create table test_network_bar_info(
bar_id int comment '网吧id',
user_id int comment '访客id-身份证号',
login_time timestamp comment '上线时间',
logout_time timestamp comment '下线时间'
);
数据准备
insert into test_network_bar_info values (1,110001,'2020-01-01 12:10:00','2020-01-01 12:30:00');
insert into test_network_bar_info values (1,110001,'2020-01-01 12:35:00','2020-01-01 12:40:00');
insert into test_network_bar_info values (1,110002,'2020-01-01 12:50:00','2020-01-01 12:55:00');
insert into test_network_bar_info values (1,110001,'2020-01-01 13:00:00','2020-01-01 13:10:00');
insert into test_network_bar_info values (1,110003,'2020-01-01 12:15:00','2020-01-01 13:15:00');
insert into test_network_bar_info values (2,110001,'2020-01-02 12:10:00','2020-01-02 12:30:00');
insert into test_network_bar_info values (2,110001,'2020-01-02 12:35:00','2020-01-02 12:40:00');
insert into test_network_bar_info values (2,110001,'2020-01-02 12:50:00','2020-01-02 12:55:00');
insert into test_network_bar_info values (2,110002,'2020-01-02 13:00:00','2020-01-02 13:10:00');
insert into test_network_bar_info values (3,110001,'2020-01-03 12:10:00','2020-01-03 12:30:00');
insert into test_network_bar_info values (3,110003,'2020-01-03 12:15:00','2020-01-03 12:40:00');
insert into test_network_bar_info values (3,110001,'2020-01-03 12:50:00','2020-01-03 12:55:00');
insert into test_network_bar_info values (3,110002,'2020-01-03 13:00:00','2020-01-03 13:10:00');
查询逻辑
select
C.user_group,
COUNT(DISTINCT C.bar_id)
from
(select
distinct
A.bar_id,
case when A.user_id > B.user_id then CONCAT(B.user_id,A.user_id)
when A.user_id < B.user_id then CONCAT(A.user_id,B.user_id)
else 'NA' end as user_group
from
(select
bar_id,
user_id,
login_time,
logout_time
from test_network_bar_info)A
inner join
(select
bar_id,
user_id,
login_time,
logout_time
from test_network_bar_info)B
on A.bar_id = B.bar_id
and A.user_id <> B.user_id
and (ABS(timestampdiff(second ,A.login_time,B.login_time)) <= 600
or ABS(timestampdiff(second ,A.logout_time,B.logout_time)) <= 600))C
GROUP BY C.user_group;
https://blog.csdn.net/weixin_43619485/java/article/details/107164729
网友评论