美文网首页
mysql 数据多个字段重复,清除重复数据

mysql 数据多个字段重复,清除重复数据

作者: 一介书生独醉江湖 | 来源:发表于2022-05-05 19:39 被阅读0次
    # 需求概述:
    
    # no_app_videocall_video表中存在大量重复数据;
    # 这里的重复的定义是:
    # videocall_id、video_name、video_path、video_size 都相同的情况下,就是重复数据;(下面有表结构)
    
    # 需要删除上述重复数据,并且对于重复的数据每个只保留一条;
    
    
    # no_app_videocall_video 表结构
    
    CREATE TABLE `no_app_videocall_video` (
      `video_id` varchar(40) NOT NULL COMMENT '视频文件ID',
      `videocall_id` varchar(40) DEFAULT NULL COMMENT '远程视频ID',
      `video_type` varchar(40) DEFAULT '' COMMENT '视频类型:person-当事人端视频; notary-公证员端视频; merge-合成视频',
      `video_name` varchar(40) DEFAULT '' COMMENT '视频文件名称',
      `video_path` varchar(400) DEFAULT '' COMMENT '视频文件路径',
      `video_file_type` varchar(10) DEFAULT '' COMMENT '视频文件类型',
      `video_size` int(11) DEFAULT '0' COMMENT '文件大小(KB)',
      `video_duration` double(20,3) DEFAULT '0.000' COMMENT '视频时长(秒)',
      `width` int(11) DEFAULT '0' COMMENT '分辨率-宽',
      `height` int(11) DEFAULT '0' COMMENT '分辨率-高',
      `bitrate` int(11) DEFAULT '0' COMMENT '码率(kbps)',
      `notary_link_st` datetime DEFAULT NULL COMMENT '公证员连线时间',
      `notary_link_ed` datetime DEFAULT NULL COMMENT '公证员挂断时间',
      `person_link_st` datetime DEFAULT NULL COMMENT '当事人连线时间',
      `person_link_ed` datetime DEFAULT NULL COMMENT '当事人挂断时间',
      `create_dt` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
      PRIMARY KEY (`video_id`),
      KEY `FK_Rno_app_videocall_video_vcid` (`videocall_id`),
      CONSTRAINT `FK_Rno_app_videocall_video_vcid` FOREIGN KEY (`videocall_id`) REFERENCES `no_app_videocall` (`videocall_id`) ON DELETE CASCADE ON UPDATE CASCADE
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='远程视频视频文件'
    
    
    
    # 第一步
    # videocall_id、video_name、video_path、video_size 都相同的数据数量 > 2,那么就是存在重复了;
    # 这里用到了group by 、count 、having and 函数;
    
    SELECT videocall_id, COUNT(videocall_id), video_name
        , COUNT(video_name), video_path
        , COUNT(video_path), video_size
        , COUNT(video_size)
    FROM no_app_videocall_video
    GROUP BY videocall_id, video_name, video_path, video_size
    HAVING (COUNT(videocall_id) > 1)
    AND (COUNT(video_name) > 1)
    AND (COUNT(video_path) > 1)
    AND (COUNT(video_size) > 1);
    
    
    
    # 第二步
    # 重复的数据筛选出来了;
    # 这些数据里面就是包含videocall_id、video_name、video_path 和 video_size同时都相同的数据! 
    SELECT t.*
    FROM no_app_videocall_video t, (
    
            # 第一步的SQL  START
            SELECT videocall_id, COUNT(videocall_id), video_name
                , COUNT(video_name), video_path
                , COUNT(video_path), video_size
                , COUNT(video_size)
            FROM no_app_videocall_video
            GROUP BY videocall_id, video_name, video_path, video_size
            HAVING (COUNT(videocall_id) > 1)
            AND (COUNT(video_name) > 1)
            AND (COUNT(video_path) > 1)
            AND (COUNT(video_size) > 1)
           # 第一步的SQL  END
    
        ) a
    WHERE t.videocall_id = a.videocall_id
        AND t.video_name = a.video_name
        AND t.video_path = a.video_path
        AND t.video_size = a.video_size;   
    
    
    
    # 第三步
    # 这里查询出来的结果是:需要保留的video_id
    
    # 对于重复的数据每个只保留一条;
    # 这里的 min(video_id) 只是取 video_id最小,可以根据需求调整:
    # 例如:max(video_id),唯一即可(因为我们对于重复数据只想保留一条);
    
    SELECT min(video_id) as video_id FROM (
    # 第二步的SQL START
    SELECT t.*
    FROM no_app_videocall_video t, (
            SELECT videocall_id, COUNT(videocall_id), video_name
                , COUNT(video_name), video_path
                , COUNT(video_path), video_size
                , COUNT(video_size)
            FROM no_app_videocall_video
            GROUP BY videocall_id, video_name, video_path, video_size
            HAVING (COUNT(videocall_id) > 1)
            AND (COUNT(video_name) > 1)
            AND (COUNT(video_path) > 1)
            AND (COUNT(video_size) > 1)
        ) a
    WHERE t.videocall_id = a.videocall_id
        AND t.video_name = a.video_name
        AND t.video_path = a.video_path
        AND t.video_size = a.video_size
    # 第二步的SQL END
        ) a GROUP BY a.videocall_id ,a.video_name,a.video_path,a.video_size;
        
    
    
    # 第四步
    # 这里查询出来的结果是:需要删除的video_id
    
    SELECT b.video_id
    FROM (
        # 第二步的SQL START
        SELECT t.*
        FROM no_app_videocall_video t, (
                SELECT videocall_id, COUNT(videocall_id), video_name
                    , COUNT(video_name), video_path
                    , COUNT(video_path), video_size
                    , COUNT(video_size)
                FROM no_app_videocall_video
                GROUP BY videocall_id, video_name, video_path, video_size
                HAVING (COUNT(videocall_id) > 1)
                AND (COUNT(video_name) > 1)
                AND (COUNT(video_path) > 1)
                AND (COUNT(video_size) > 1)
            ) a
        WHERE t.videocall_id = a.videocall_id
            AND t.video_name = a.video_name
            AND t.video_path = a.video_path
            AND t.video_size = a.video_size
        # 第二步的SQL END
    ) b
    WHERE b.video_id NOT IN (
        # 第三步的SQL START
        SELECT min(video_id) AS video_id
        FROM (
            SELECT t.*
            FROM no_app_videocall_video t, (
                    SELECT videocall_id, COUNT(videocall_id), video_name
                        , COUNT(video_name), video_path
                        , COUNT(video_path), video_size
                        , COUNT(video_size)
                    FROM no_app_videocall_video
                    GROUP BY videocall_id, video_name, video_path, video_size
                    HAVING (COUNT(videocall_id) > 1)
                    AND (COUNT(video_name) > 1)
                    AND (COUNT(video_path) > 1)
                    AND (COUNT(video_size) > 1)
                ) a
            WHERE t.videocall_id = a.videocall_id
                AND t.video_name = a.video_name
                AND t.video_path = a.video_path
                AND t.video_size = a.video_size
        ) a
        GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
        # 第三步的SQL END
    );
    
    
    # 第五步
    # 最终SQL;
    # 删除重复数据video_id的数据即可;
    
    
    DELETE FROM no_app_videocall_video
    WHERE video_id IN (
            # 第四步的SQL START
            SELECT b.video_id
            FROM (
                SELECT t.*
                FROM no_app_videocall_video t, (
                        SELECT videocall_id, COUNT(videocall_id), video_name
                            , COUNT(video_name), video_path
                            , COUNT(video_path), video_size
                            , COUNT(video_size)
                        FROM no_app_videocall_video
                        GROUP BY videocall_id, video_name, video_path, video_size
                        HAVING (COUNT(videocall_id) > 1)
                        AND (COUNT(video_name) > 1)
                        AND (COUNT(video_path) > 1)
                        AND (COUNT(video_size) > 1)
                    ) a
                WHERE t.videocall_id = a.videocall_id
                    AND t.video_name = a.video_name
                    AND t.video_path = a.video_path
                    AND t.video_size = a.video_size
            ) b
            WHERE b.video_id NOT IN (
                SELECT min(video_id) AS video_id
                FROM (
                    SELECT t.*
                    FROM no_app_videocall_video t, (
                            SELECT videocall_id, COUNT(videocall_id), video_name
                                , COUNT(video_name), video_path
                                , COUNT(video_path), video_size
                                , COUNT(video_size)
                            FROM no_app_videocall_video
                            GROUP BY videocall_id, video_name, video_path, video_size
                            HAVING (COUNT(videocall_id) > 1)
                            AND (COUNT(video_name) > 1)
                            AND (COUNT(video_path) > 1)
                            AND (COUNT(video_size) > 1)
                        ) a
                    WHERE t.videocall_id = a.videocall_id
                        AND t.video_name = a.video_name
                        AND t.video_path = a.video_path
                        AND t.video_size = a.video_size
                ) a
                GROUP BY a.videocall_id, a.video_name, a.video_path, a.video_size
            )
            # 第四步的SQL END
        );
    
    
    
    # 我参考的这个文档中还有关于 <单个字段 数据重复>情况的处理,有需要的可以直接看这个;
    # 参考:https://blog.csdn.net/qq_35387940/article/details/108074927?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_antiscanv2&utm_relevant_index=2
    

    相关文章

      网友评论

          本文标题:mysql 数据多个字段重复,清除重复数据

          本文链接:https://www.haomeiwen.com/subject/firoyrtx.html