#常规建表语句
create table tv_corpus
(device_id string,
eposide_id string,
channel_id string)
partitioned by(
dt string,
scene string);
#显示字段名
set hive.cli.print.header=true;
set hive.resultset.use.unique.column.names=false;
#查看表的建立语句
show create table tv_corpus;
#建立一张临时表存储查询结果
create temporary table tv_corpus select ... from
#将存储查询结果插入某表
insert overwirte(into) table tv_corpus partition (dt='${bizdate}')
select ... from
#with..as子查询
with t1 as (select .. from tv_corpus),
t2 as (slect .. from falls_corpus)
select t1.*, t2.* from t1,t2 where...
# not in的表达(hive sql不支持not in)
select a.* from
(select * from t1)a
left join
(select * from t2)b
on a.uid = b.uid where b.uid is null;
#concat cnocat_ws...
concat(a,b,c) #连接abc字段
concat_ws('-',a,b) #以-分隔符连接字段
group_concat(a).. group by b #返回以b聚合后的a值
#split
select split(device_id,'#')[0] as device_id from tv_corpus;
#explode 将分割后的字符打散成多行
select explode(split(device_id,'#')) as device_id from tv_corpus;
#lateral view
select myTable.r_source, cnt(1) from
(select r_source from tv_corpus where dt = '${bizdate}'
lateral view explode(split(r_source,'@')) myTable as r_source
group by myTable.r_source
#get_json_object :json的解析函数
对于[{"name":"王二狗","sex":"男","age":"25"},{"name":"李狗嗨","sex":"男","age":"47"}]
#取出第一个json对象
SELECT get_json_object(json,"$.[0]") FROM person;
#返回
{"name":"王二狗","sex":"男","age":"25"}
#列转行
collect_set : 去重
collect_list: 不去重
Coalesce 取指定内容(列)中第一个不为空的值 可以理解为 ifnull() 是其简化版
https://www.cnblogs.com/yanglang/p/10081181.html
union 并集
intersect 交集
except 差集 https://www.cnblogs.com/kissdodog/archive/2013/06/24/3152743.html
regexp_replace 正则替换 https://blog.csdn.net/qq_20989105/article/details/77894949
array_contains ARRAY_CONTAINS(pages_sns, "a") AS .. 判断是否包含‘a’
!array_contains是不包含的
substr substr(string,1,3) 得到str 从左往右取3字长 从右往左取的话 substr(string,-1,3) 得到ing
round()函数遵循四舍五入原则,用于把数值字段舍入为指定的小数位数
floor(value)函数返回小于或等于指定值(value)的最小整数
ceiling(value)函数返回大于或等于指定值(value)的最小整数
trim(device_id) 移除字符首尾空格。 还有 ltrim rtrim https://www.1keydata.com/cn/sql/sql-trim.php
网友评论