# 需求概述:
# no_app_videocall_video表中存在大量重复数据;
# 这里的重复的定义是:
# videocall_id、video_name、video_path、video_size 都相同的情况下,就是重复数据;(下面有表结构)
# 需要删除上述重复数据,并且对于重复的数据每个只保留一条;
# no_app_videocall_video 表结构
CREATE TABLE `no_app_videocall_video` (
`video_id` varchar(40) NOT NULL COMMENT '视频文件ID',
`videocall_id` varchar(40) DEFAULT NULL COMMENT '远程视频ID',
`video_type` varchar(40) DEFAULT '' COMMENT '视频类型:person-当事人端视频; notary-公证员端视频; merge-合成视频',
`video_name` varchar(40) DEFAULT '' COMMENT '视频文件名称',
`video_path` varchar(400) DEFAULT '' COMMENT '视频文件路径',
`video_file_type` varchar(10) DEFAULT '' COMMENT '视频文件类型',
`video_size` int(11) DEFAULT '0' COMMENT '文件大小(KB)',
`video_duration` double(20,3) DEFAULT '0.000' COMMENT '视频时长(秒)',
`width` int(11) DEFAULT '0' COMMENT '分辨率-宽',
`height` int(11) DEFAULT '0' COMMENT '分辨率-高',
`bitrate` int(11) DEFAULT '0' COMMENT '码率(kbps)',
`notary_link_st` datetime DEFAULT NULL COMMENT '公证员连线时间',
`notary_link_ed` datetime DEFAULT NULL COMMENT '公证员挂断时间',
`person_link_st` datetime DEFAULT NULL COMMENT '当事人连线时间',
`person_link_ed` datetime DEFAULT NULL COMMENT '当事人挂断时间',
`create_dt` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
PRIMARY KEY (`video_id`),
KEY `FK_Rno_app_videocall_video_vcid` (`videocall_id`),
CONSTRAINT `FK_Rno_app_videocall_video_vcid` FOREIGN KEY (`videocall_id`) REFERENCES `no_app_videocall` (`videocall_id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='远程视频视频文件'
# 第一步
# videocall_id、video_name、video_path、video_size 都相同的数据数量 > 2,那么就是存在重复了;
# 这里用到了group by 、count 、having and 函数;
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1);
# 第二步
# 重复的数据筛选出来了;
# 这些数据里面就是包含videocall_id、video_name、video_path 和 video_size同时都相同的数据!
SELECT t.*
FROM no_app_videocall_video t, (
# 第一步的SQL START
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
# 第一步的SQL END
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size;
# 第三步
# 这里查询出来的结果是:需要保留的video_id
# 对于重复的数据每个只保留一条;
# 这里的 min(video_id) 只是取 video_id最小,可以根据需求调整:
# 例如:max(video_id),唯一即可(因为我们对于重复数据只想保留一条);
SELECT min(video_id) as video_id FROM (
# 第二步的SQL START
SELECT t.*
FROM no_app_videocall_video t, (
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size
# 第二步的SQL END
) a GROUP BY a.videocall_id ,a.video_name,a.video_path,a.video_size;
# 第四步
# 这里查询出来的结果是:需要删除的video_id
SELECT b.video_id
FROM (
# 第二步的SQL START
SELECT t.*
FROM no_app_videocall_video t, (
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size
# 第二步的SQL END
) b
WHERE b.video_id NOT IN (
# 第三步的SQL START
SELECT min(video_id) AS video_id
FROM (
SELECT t.*
FROM no_app_videocall_video t, (
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size
) a
GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
# 第三步的SQL END
);
# 第五步
# 最终SQL;
# 删除重复数据video_id的数据即可;
DELETE FROM no_app_videocall_video
WHERE video_id IN (
# 第四步的SQL START
SELECT b.video_id
FROM (
SELECT t.*
FROM no_app_videocall_video t, (
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size
) b
WHERE b.video_id NOT IN (
SELECT min(video_id) AS video_id
FROM (
SELECT t.*
FROM no_app_videocall_video t, (
SELECT videocall_id, COUNT(videocall_id), video_name
, COUNT(video_name), video_path
, COUNT(video_path), video_size
, COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1)
) a
WHERE t.videocall_id = a.videocall_id
AND t.video_name = a.video_name
AND t.video_path = a.video_path
AND t.video_size = a.video_size
) a
GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
)
# 第四步的SQL END
);
# 我参考的这个文档中还有关于 <单个字段 数据重复>情况的处理,有需要的可以直接看这个;
# 参考:https://blog.csdn.net/qq_35387940/article/details/108074927?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&utm_relevant_index=2
网友评论