主要针对 21.x 版本新增函数的使用进行补充说明
v21.10
-
UDF
用户可通过添加lambda表达式,创建自定义Function
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
SELECT number, linear_equation(number, 2, 1) FROM numbers(3);
CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');
SELECT number, parity_str(number) FROM numbers(3);
-
intersect
求多个数据集在某一维度上的交集,适合在用户分群等类似业务场景使用
select count( 1) from (
select id as create_user from app.user_model where 1=1 and product_count>=10 and product_count<=300
intersect
select create_user from app.work_basic_model where total_uv>=100 and total_uv<=350
intersect
select create_user from app.work_basic_model where total_uv>=200 and total_uv<=350
)
类似:
with
(select groupUniqArray(u_i) from (select id as u_i from (select *from app.user_model where 1=1 and product_count>=10 and product_count<=300 ) )) as u0,
(select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=100 and total_uv<=350 ) )) as u1 ,
(select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=200 and total_uv<=350 ) )) as u2
select length(arrayIntersect(u0,u1,u2)) as u
-
except
用第一个查询子集与后面所有子集求差集
select arrayJoin([1,2,3,4]) except select arrayJoin([1,2]) except select arrayJoin([4,5])
-
leftPad/rightPad
可用于对某些敏感信息进行脱敏处理
SELECT leftPad(substring(phone,-3,3), length( phone ), '*') from (select '13126966152' phone)
-
splitByRegexp
按照正则表达式对文本进行分割,分割后返回一个数组
--提取html中的所有去标签后的文本信息
select splitByRegexp('<[^<>]*>', x) from (select arrayJoin(['<h1>hello<h2>world</h2></h1>', 'gbye<split>bug']) x)
-
mapContains/mapKeys/mapValues
新增map数据类型相关处理函数
select map( 'aa', 4, 'bb' , 5) as m, mapContains(m, 'aa'),mapContains(m, 'cc'), mapKeys(m), mapValues(m)
-
countMatches
基于正则表达式统计匹配数
select countMatches('foo.com bar.com baz.com bam.com', '([^. ]+)\.([^. ]+)')
-
accurateCastOrNull
对字段值进行类型转换校验,转换成功返回转换后的类型数据,否则Null
SELECT accurateCastOrNull(2, 'Int8'), accurateCastOrNull('ss', 'Int8')
-
countSubstrings/countSubstringsCaseInsensitive
计算某个字符串中包含特定字符的数量
select countSubstrings('com.foo.com.bar.com', 'com') ,countSubstringsCaseInsensitive('BaBaB', 'A')
网友评论