美文网首页
mysql 数据多个字段重复,清除重复数据

mysql 数据多个字段重复,清除重复数据

作者: 一介书生独醉江湖 | 来源:发表于2022-05-05 19:39 被阅读0次
# 需求概述:

# no_app_videocall_video表中存在大量重复数据;
# 这里的重复的定义是:
# videocall_id、video_name、video_path、video_size 都相同的情况下,就是重复数据;(下面有表结构)

# 需要删除上述重复数据,并且对于重复的数据每个只保留一条;

# no_app_videocall_video 表结构

CREATE TABLE `no_app_videocall_video` (
  `video_id` varchar(40) NOT NULL COMMENT '视频文件ID',
  `videocall_id` varchar(40) DEFAULT NULL COMMENT '远程视频ID',
  `video_type` varchar(40) DEFAULT '' COMMENT '视频类型:person-当事人端视频; notary-公证员端视频; merge-合成视频',
  `video_name` varchar(40) DEFAULT '' COMMENT '视频文件名称',
  `video_path` varchar(400) DEFAULT '' COMMENT '视频文件路径',
  `video_file_type` varchar(10) DEFAULT '' COMMENT '视频文件类型',
  `video_size` int(11) DEFAULT '0' COMMENT '文件大小(KB)',
  `video_duration` double(20,3) DEFAULT '0.000' COMMENT '视频时长(秒)',
  `width` int(11) DEFAULT '0' COMMENT '分辨率-宽',
  `height` int(11) DEFAULT '0' COMMENT '分辨率-高',
  `bitrate` int(11) DEFAULT '0' COMMENT '码率(kbps)',
  `notary_link_st` datetime DEFAULT NULL COMMENT '公证员连线时间',
  `notary_link_ed` datetime DEFAULT NULL COMMENT '公证员挂断时间',
  `person_link_st` datetime DEFAULT NULL COMMENT '当事人连线时间',
  `person_link_ed` datetime DEFAULT NULL COMMENT '当事人挂断时间',
  `create_dt` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  PRIMARY KEY (`video_id`),
  KEY `FK_Rno_app_videocall_video_vcid` (`videocall_id`),
  CONSTRAINT `FK_Rno_app_videocall_video_vcid` FOREIGN KEY (`videocall_id`) REFERENCES `no_app_videocall` (`videocall_id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='远程视频视频文件'


# 第一步
# videocall_id、video_name、video_path、video_size 都相同的数据数量 > 2,那么就是存在重复了;
# 这里用到了group by 、count 、having and 函数;

SELECT videocall_id, COUNT(videocall_id), video_name
    , COUNT(video_name), video_path
    , COUNT(video_path), video_size
    , COUNT(video_size)
FROM no_app_videocall_video
GROUP BY videocall_id, video_name, video_path, video_size
HAVING (COUNT(videocall_id) > 1)
AND (COUNT(video_name) > 1)
AND (COUNT(video_path) > 1)
AND (COUNT(video_size) > 1);


# 第二步
# 重复的数据筛选出来了;
# 这些数据里面就是包含videocall_id、video_name、video_path 和 video_size同时都相同的数据! 
SELECT t.*
FROM no_app_videocall_video t, (

        # 第一步的SQL  START
        SELECT videocall_id, COUNT(videocall_id), video_name
            , COUNT(video_name), video_path
            , COUNT(video_path), video_size
            , COUNT(video_size)
        FROM no_app_videocall_video
        GROUP BY videocall_id, video_name, video_path, video_size
        HAVING (COUNT(videocall_id) > 1)
        AND (COUNT(video_name) > 1)
        AND (COUNT(video_path) > 1)
        AND (COUNT(video_size) > 1)
       # 第一步的SQL  END

    ) a
WHERE t.videocall_id = a.videocall_id
    AND t.video_name = a.video_name
    AND t.video_path = a.video_path
    AND t.video_size = a.video_size;   


# 第三步
# 这里查询出来的结果是:需要保留的video_id

# 对于重复的数据每个只保留一条;
# 这里的 min(video_id) 只是取 video_id最小,可以根据需求调整:
# 例如:max(video_id),唯一即可(因为我们对于重复数据只想保留一条);

SELECT min(video_id) as video_id FROM (
# 第二步的SQL START
SELECT t.*
FROM no_app_videocall_video t, (
        SELECT videocall_id, COUNT(videocall_id), video_name
            , COUNT(video_name), video_path
            , COUNT(video_path), video_size
            , COUNT(video_size)
        FROM no_app_videocall_video
        GROUP BY videocall_id, video_name, video_path, video_size
        HAVING (COUNT(videocall_id) > 1)
        AND (COUNT(video_name) > 1)
        AND (COUNT(video_path) > 1)
        AND (COUNT(video_size) > 1)
    ) a
WHERE t.videocall_id = a.videocall_id
    AND t.video_name = a.video_name
    AND t.video_path = a.video_path
    AND t.video_size = a.video_size
# 第二步的SQL END
    ) a GROUP BY a.videocall_id ,a.video_name,a.video_path,a.video_size;
    

# 第四步
# 这里查询出来的结果是:需要删除的video_id

SELECT b.video_id
FROM (
    # 第二步的SQL START
    SELECT t.*
    FROM no_app_videocall_video t, (
            SELECT videocall_id, COUNT(videocall_id), video_name
                , COUNT(video_name), video_path
                , COUNT(video_path), video_size
                , COUNT(video_size)
            FROM no_app_videocall_video
            GROUP BY videocall_id, video_name, video_path, video_size
            HAVING (COUNT(videocall_id) > 1)
            AND (COUNT(video_name) > 1)
            AND (COUNT(video_path) > 1)
            AND (COUNT(video_size) > 1)
        ) a
    WHERE t.videocall_id = a.videocall_id
        AND t.video_name = a.video_name
        AND t.video_path = a.video_path
        AND t.video_size = a.video_size
    # 第二步的SQL END
) b
WHERE b.video_id NOT IN (
    # 第三步的SQL START
    SELECT min(video_id) AS video_id
    FROM (
        SELECT t.*
        FROM no_app_videocall_video t, (
                SELECT videocall_id, COUNT(videocall_id), video_name
                    , COUNT(video_name), video_path
                    , COUNT(video_path), video_size
                    , COUNT(video_size)
                FROM no_app_videocall_video
                GROUP BY videocall_id, video_name, video_path, video_size
                HAVING (COUNT(videocall_id) > 1)
                AND (COUNT(video_name) > 1)
                AND (COUNT(video_path) > 1)
                AND (COUNT(video_size) > 1)
            ) a
        WHERE t.videocall_id = a.videocall_id
            AND t.video_name = a.video_name
            AND t.video_path = a.video_path
            AND t.video_size = a.video_size
    ) a
    GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
    # 第三步的SQL END
);

# 第五步
# 最终SQL;
# 删除重复数据video_id的数据即可;


DELETE FROM no_app_videocall_video
WHERE video_id IN (
        # 第四步的SQL START
        SELECT b.video_id
        FROM (
            SELECT t.*
            FROM no_app_videocall_video t, (
                    SELECT videocall_id, COUNT(videocall_id), video_name
                        , COUNT(video_name), video_path
                        , COUNT(video_path), video_size
                        , COUNT(video_size)
                    FROM no_app_videocall_video
                    GROUP BY videocall_id, video_name, video_path, video_size
                    HAVING (COUNT(videocall_id) > 1)
                    AND (COUNT(video_name) > 1)
                    AND (COUNT(video_path) > 1)
                    AND (COUNT(video_size) > 1)
                ) a
            WHERE t.videocall_id = a.videocall_id
                AND t.video_name = a.video_name
                AND t.video_path = a.video_path
                AND t.video_size = a.video_size
        ) b
        WHERE b.video_id NOT IN (
            SELECT min(video_id) AS video_id
            FROM (
                SELECT t.*
                FROM no_app_videocall_video t, (
                        SELECT videocall_id, COUNT(videocall_id), video_name
                            , COUNT(video_name), video_path
                            , COUNT(video_path), video_size
                            , COUNT(video_size)
                        FROM no_app_videocall_video
                        GROUP BY videocall_id, video_name, video_path, video_size
                        HAVING (COUNT(videocall_id) > 1)
                        AND (COUNT(video_name) > 1)
                        AND (COUNT(video_path) > 1)
                        AND (COUNT(video_size) > 1)
                    ) a
                WHERE t.videocall_id = a.videocall_id
                    AND t.video_name = a.video_name
                    AND t.video_path = a.video_path
                    AND t.video_size = a.video_size
            ) a
            GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
        )
        # 第四步的SQL END
    );


# 我参考的这个文档中还有关于 <单个字段 数据重复>情况的处理,有需要的可以直接看这个;
# 参考:https://blog.csdn.net/qq_35387940/article/details/108074927?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&utm_relevant_index=2

相关文章

网友评论

      本文标题:mysql 数据多个字段重复,清除重复数据

      本文链接:https://www.haomeiwen.com/subject/firoyrtx.html