此篇主要介绍去重和去重过程中空值的处理
源表部分内容
表内容.png
一、去重的四种方式
-
group by
-- 去重时 空和null均参与计算
select name from demo.product_name group by name ;
-
distinct
select distinct name from demo.product_name ;
-
窗口函数
-- 伪列实现去重
select * from
(
select *,ROW_NUMBER() over(partition by name order by id ) as rn from demo.product_name
) a where rn =1
-
union
-- 使用union实现去重
select name from demo.product_name
union
select name from demo.product_name
-
查找重复值
-- 查找重复值
select name , count(1) as cnt from demo.product_name group by name having cnt > 1 ;
-- 窗口函数查重
select * from
(
select *,ROW_NUMBER() over(partition by name order by id ) as rn from demo.product_name
) a where rn > 1
二、空值处理
-
where 语句空值
-- where 条件会过滤null , 不过滤空; name is null 可同时识别null和空
select name from demo.product_name where name not regexp '[一-龢]'
select name from demo.product_name where name not regexp '[一-龢]' or name is null
-
avg 语句空值
-- avg 空参与计算,null不参与计算 name is null 可同时识别null和空
select avg(name) from (select name from demo.product_name where name not regexp '[一-龥]' or name is null group by name ) a ;
-
count 语句空值
-- 注意count(1),connt(字段名)的区别:
-- 1.无null值时没有区别,有null时,count(1) 会计算null值;
-- 2.count(字段名) 计算空,不计算null
select count(name) from (select name from demo.product_name group by name ) a ;
select count(distinct name) from demo.product_name ;
select count(1) from (select name from demo.product_name group by name ) a ;
-
min/max
-- max \min 空参与计算为最小值, null 不计算
select max(name) as l_name ,min(name) as m_name from demo.product_name
-
IFNULL 归一化空值
-- 替换null为空
select IFNULL(name,'') as name from demo.product_name
网友评论