gp-test
select count(*) from test.ssd_sales_member_order--50352035,8255ms
select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order'))--2494 MB
固态盘,ext3文件格式
表大小,2.5G
导出txt
21G,gzip压缩后2.9G
scp root@114.55.92.30:/home/install/data-integration/etl/test-gp.dat /kettle7 #speed:0.6m/s
gzip -c test-gp.dat > test-gp.dat.gz
600+kb/s(5M带宽)
gzip –d /opt/etc.zip.gz (gzip -d <=> gunzip)
gpfdist加载
gp5.8版本
-- 启动gpfdist
locate gpfdist
(需要GPHOME环境变量:source /usr/local/greenplum-db/greenplum_path.sh)
/usr/local/greenplum-db-5.8.0/bin/gpfdist -d /kettle7/data-integration/gpload -p 9876
-l /kettle7/data-integration/gpload &
-- 外部表:
LOCATION ('gpfdist://192.168.200.206:9876/test-gp.dat')
FORMAT 'TEXT' (DELIMITER '|' NULL '' ESCAPE 'OFF')
LOG ERRORS SEGMENT REJECT LIMIT 2 rows
压缩前
select count(*) from gpfdist.ssd_sales_member_order --229125ms
-- 加载到内部表:1452s,半小时
insert into test.ssd_sales_member_order
select * from gpfdist.ssd_sales_member_order
50352035/1452=34677 rows/s
20152418304/1024/1024/1452=13m/s
select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order'))--23GB
压缩后
ALTER TABLE 表名 SET SCHEMA 新schema
455983ms
select 20152418304/1024/1024/455=42m/s
select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order_zip'))--2534 MB
select count() from test.ssd_sales_member_order --146591ms
select count() from test.ssd_sales_member_order_zip --2400ms
--分布情况
select gp_segment_id,count(*) from test.ssd_sales_member_order_zip group by 1
库 | cpu | 内存 | 条数 | count | 数据量 | txt加载速度 | 备注 |
---|---|---|---|---|---|---|---|
单机8seg | 8核 | 64G | 50352035 | 8255ms | 2494M | - | ssd,列存,5倍压缩 |
集群2×4p4m | 8核 | 16G | 50352035 | 146591ms | 23G | 1452480ms | 行存 |
集群2×4p4m | 8核 | 16G | 50352035 | 2400ms | 2534M | 455983ms | 列存,5倍压缩 |
-- 外部表加载错误
SELECT gp_read_error_log('ods.inventory');
-- DROP external TABLE gpfdist.ssd_sales_member_order
select count(*) from test.ssd_sales_member_order;--50352035
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order;--
select count(*) from test.ssd_sales_member_order;--100704070,4104ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order00;
select count(*) from test.ssd_sales_member_order;--151056105,5567ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order00;
select count(*) from test.ssd_sales_member_order;--201408140,7085ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order;
select count(*) from test.ssd_sales_member_order;--402816280,13142ms
select filday,count(*) from test.ssd_sales_member_order group by filday;--17.210s
select filmonth,count(*) from test.ssd_sales_member_order group by filmonth;--21.902s
explain
select filyear,count(*) from test.ssd_sales_member_order group by filyear;--21.260s
truncate table test.ssd_sales_member_order
SELECT SUM
( anon_1.amt ) AS amt
FROM
(
SELECT
round( SUM ( anon_2.amt ) / COUNT ( DISTINCT ( anon_2.order_id )), 2 ) AS per,
round(( SUM ( anon_2.purchase_rate ) * 1.0 ) / COUNT ( DISTINCT ( anon_2.order_id )), 2 ) AS purchase_rate,
count (
DISTINCT ( anon_2.order_id )) AS COUNT,
SUM ( anon_2.amt ) AS amt
FROM
(
SELECT
test.ssd_sales_member_order.order_id AS order_id,
test.ssd_sales_member_order.memberid AS member_id,
test.ssd_sales_member_order.amt AS amt,
test.ssd_sales_member_order.purchase_rate AS purchase_rate,
test.ssd_sales_member_order.cnt AS cnt
FROM
test.ssd_sales_member_order
WHERE
test.ssd_sales_member_order.filday >= '2018-01-01'
AND test.ssd_sales_member_order.filday <= '2018-05-15'
GROUP BY
test.ssd_sales_member_order.order_id,
test.ssd_sales_member_order.memberid,
test.ssd_sales_member_order.amt,
test.ssd_sales_member_order.purchase_rate,
test.ssd_sales_member_order.cnt
) AS anon_2
GROUP BY
anon_2.member_id
) AS anon_1
WHERE
anon_1.purchase_rate >= 1
AND anon_1.purchase_rate < 1.5
网友评论