先建表
create table user_attendence(
user_id bigint comment '用户名',
date TIMESTAMP comment '日期' ,
is_sign bigint comment '是否登录,0为否,1为是');
-- delete from user_attendence;
接下来用python自己导入随机数据,500个用户、时间2021-1-1~2021-5-5。
import pymysql
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
df = pd.DataFrame()
date = pd.date_range('2021-1-1','2021-5-5').strftime('%Y-%m-%d')
df["user_id"]=[i for i in np.arange(1,500)]*len(date)
df['date'] = [i for i in date]*len(np.arange(1,500))
df["is_sign"] = [np.random.choice([0,1]) for i in range(len(df))]
df = df.sort_values(by=['user_id','date'],na_position='first')
#写入数据库
con = 'mysql+pymysql://root:3333@localhost:3306/supermanzwg?charset=utf8'
#如果出现字符串编码错误记得在Navicat运行alter table 表名 convert to character set utf8mb4;
df.to_sql(name='user_attendence',con=con,if_exists='append',index = False)
截至当前,每个用户已经连续签到的天数
查出最后一次不登录的日期,与当前日期进行比较,二者差值为几就是连续登录几天
select max(date) from user_attendence;
select
user_id ,
max(date),
DATEDIFF('2021-02-19',max(date)) as max_con_days
from user_attendence
where is_sign = 0
group by user_id;
计算有史以来用户最大连续签到天数;
对用户分组,按日期进行排序,排序序号为rank;
如果日期与序号的差值为相等,按这个差值进行分组并计数,取最大值
select
user_id
,max(次数)from
(select user_id
,date,rn
,a.date-a.rn diff
,count(*) 次数
from
(select
user_id
,date,
row_number() over(partition by user_id order by date asc) rn
from user_attendence
where is_sign = 1
) a
group by user_id,diff
) b
group by user_id;
网友评论