美文网首页
2022-03-02 表数据加载

2022-03-02 表数据加载

作者: Denholm | 来源:发表于2022-03-02 18:42 被阅读0次
-- 1、insert into 添加分区表数据(测试用,实际开发不用)
-- 复制表结构
create table score3 like score;

-- table可省略
insert into table score3 partition (month = '202001')
values ('001', '002', 100);

insert into score3 partition (month = '202002')
values ('001', '003', 100);

select *
from score3;

-- 2、通过查询方式添加(常用)!!!!!!!!!!!!!!
create table score4 like score;

-- 重点,remember
insert overwrite table score4 partition (month = '201912')
-- 字段个数、顺序要一一对应
select sid, cid, sscore
from score;

select *
from score4;

-- 3、通过load方式加载(常用)!!!!!!!!!!!!!
create table score5 like score;

-- 重点,remember
-- 有local:从linux本地加载,这种加载做的是复制
load data local inpath '/export/data/score.txt' -- linux目录
    overwrite into table score5 partition (month = '194702');

-- 无local:从hdfs加载,这种加载做的是剪切
load data inpath '/myhive/data/score.txt' -- hdfs目录
    overwrite into table score5 partition (month = '190001');

select *
from score5;

select count(*)
from score5;

-- 4、如果先有数据后创建表,则可以通过location方法加载数据
create external table covid3
(
    date_value date,
    county     string,
    state      string,
    fips       string,
    cases      int,
    deaths     int
)
    row format delimited fields terminated by ','
    location '/hive/data/covid';

select count(*)
from covid3;

select sum(cases)
from covid3;

select *
from covid3;

-- 5、通过hadoop fs -put/-mv 将数据文件(数据文件格式要和表结构保持一致)上传到表目录文件夹即可自动生成表数据


-- 6、多插入模式加载(熟悉)!!!!!!!!!!!!!!
create table score6 like score;

load data local inpath '/export/data/score.txt'
    overwrite into table score6 partition (month = '171819');

select *
from score6;

-- 创建表字段对应score6表前两个字段
create table score_first
(
    sid string,
    cid string
)
    partitioned by (month string)
    row format delimited fields terminated by '\t';

-- 创建表字段对应score6表后两个字段
create table score_second
(
    cid    string,
    sscore int
)
    partitioned by (month string)
    row format delimited fields terminated by '\t';

from score6
-- 将score6表的前两个字段查询出来插入到score_first
insert
overwrite
table
score_first
partition
(
month = '199106'
)
select sid,
       cid
-- 将score6表的后两个字段查询出来插入到score_second
insert
overwrite
table
score_second
partition
(
month = '199106'
)
select cid, sscore;

select *
from score_first;

select *
from score_second;

相关文章

网友评论

      本文标题:2022-03-02 表数据加载

      本文链接:https://www.haomeiwen.com/subject/vsrerrtx.html