美文网首页
hive正则表达式

hive正则表达式

作者: cyc_twentyfive | 来源:发表于2022-05-05 18:22 被阅读0次

    1. Hive支持如下三个正则表达式:

    1. regexp
    2. regexp_extract
    3. regexp_replace

    1.1 字符集合:

    image.png

    1.2 重复次数

    image.png

    1.3 组合操作符

    image.png

    2. regexp

    select rn
    from(
        select '11145678abc' as rn
        union all
        select '111456789abc' as rn
        union all
        select 'd111456789abc' as rn
        )t
    where rn regexp '\\d{8}'   #有8个及以上数字的
    where rn regexp '^\\d{8}'  #以8个及以上数字开头
    where rn regexp '^\\d{8}\\D'  #以8个数字开头
    

    3. regexp_replace

    #语法
    regexp_replace(string INITIAL_STRING, string PATTERN, string REPLACEMENT)
    #返回用替换实例替换INITIAL_STRING中与模式中定义的java正则表达式语法匹配的所有子字符串所产生的字符串
    

    测试数据

    select id1,
             regexp_replace(str,'([^\\u4E00-\\u9FA5]+)','') as new_str1,  --截取汉字部分
             regexp_replace(str,'([^0-9]+)','') as new_str2,  --截取数字部分
             regexp_replace(str,'([^a-zA-Z]+)','') as new_str3,  --截取字母部分
             regexp_replace(str,'([^a-zA-Z0-9]+)','') as new_str4  --截取字母和数字
    from(
       select 1 as id1,'我在学习Hive,大数据。' as str
       union all
       select 2 as id1,'Hive,我来了,Coming666。' as str
       union all
       select 3 as id1,'666,Hive居然拥有关系型数据库的诸多特性。' as str
       union all
       select 4 as id1,'wuwuwu,Hive学习起来还是存在一定难度' as str
       union all
       select 5 as id1,'Hive数据仓库,6666。' as str
       )t
    

    4. regexp_extract

    #语法
    regexp_extract(string subject, string pattern, int index)
    #index=0返回匹配上的所有字符串,index=1表示匹配第一个括号的表达式,index=2匹配第二个括号的表达式,以此类推
    

    测试数据

    select id1,
              regexp_extract(str,'(filtertype"\\:")(\\d+)(",)',2) as new_str1,  --截取filter数字
              regexp_extract(str,'(filtername"\\:")((\\W*\\w*)|(\\w*))(",)',2) as new_str2,  --截取filtername
              regexp_extract(str,'(filtertitle"\\:")((\\W*\\w*)|(\\w*))(",)',2) as new_str3,  --截取filtertitle
              regexp_extract(str,'(filterid"\\:")(\\d+\\|\\d+)(",)',2) as new_str4, --截取filter_id
              regexp_extract(str,'(filtertype"\\:")(\\d+)(",)',0) as new_str5
    from(
        select 1 as id1,'{"filtertype":"29","filtername":"成人Node","filtertitle":"成人Group","filtersubtype":"","filterid":"29|1","filterValue":"1|4"}' as str
        union all
        select 2 as id1,'{"filtertitle":"钻级","filtertype":"16","filtersubtype":"","filtername":"四钻/高档","filterid":"16|4",}' as str
        )t
    

    相关文章

      网友评论

          本文标题:hive正则表达式

          本文链接:https://www.haomeiwen.com/subject/abroyrtx.html