美文网首页
1+2集群gp测试

1+2集群gp测试

作者: 长振 | 来源:发表于2018-06-26 11:12 被阅读0次

gp-test

select count(*) from test.ssd_sales_member_order--50352035,8255ms
select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order'))--2494 MB

固态盘,ext3文件格式
表大小,2.5G

导出txt

21G,gzip压缩后2.9G
scp root@114.55.92.30:/home/install/data-integration/etl/test-gp.dat /kettle7 #speed:0.6m/s
gzip -c test-gp.dat > test-gp.dat.gz
600+kb/s(5M带宽)
gzip –d /opt/etc.zip.gz (gzip -d <=> gunzip)

gpfdist加载

gp5.8版本
-- 启动gpfdist
locate gpfdist
(需要GPHOME环境变量:source /usr/local/greenplum-db/greenplum_path.sh)
/usr/local/greenplum-db-5.8.0/bin/gpfdist -d /kettle7/data-integration/gpload -p 9876
-l /kettle7/data-integration/gpload &
-- 外部表:
LOCATION ('gpfdist://192.168.200.206:9876/test-gp.dat')
FORMAT 'TEXT' (DELIMITER '|' NULL '' ESCAPE 'OFF')
LOG ERRORS SEGMENT REJECT LIMIT 2 rows

压缩前

select count(*) from gpfdist.ssd_sales_member_order --229125ms
-- 加载到内部表:1452s,半小时
insert into test.ssd_sales_member_order
select * from gpfdist.ssd_sales_member_order
50352035/1452=34677 rows/s
20152418304/1024/1024/1452=13m/s

select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order'))--23GB

压缩后

ALTER TABLE 表名 SET SCHEMA 新schema
455983ms
select 20152418304/1024/1024/455=42m/s
select pg_size_pretty(pg_relation_size('test.ssd_sales_member_order_zip'))--2534 MB

select count() from test.ssd_sales_member_order --146591ms
select count(
) from test.ssd_sales_member_order_zip --2400ms
--分布情况
select gp_segment_id,count(*) from test.ssd_sales_member_order_zip group by 1

cpu 内存 条数 count 数据量 txt加载速度 备注
单机8seg 8核 64G 50352035 8255ms 2494M - ssd,列存,5倍压缩
集群2×4p4m 8核 16G 50352035 146591ms 23G 1452480ms 行存
集群2×4p4m 8核 16G 50352035 2400ms 2534M 455983ms 列存,5倍压缩

-- 外部表加载错误
SELECT gp_read_error_log('ods.inventory');
-- DROP external TABLE gpfdist.ssd_sales_member_order

select count(*) from test.ssd_sales_member_order;--50352035
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order;--
select count(*) from test.ssd_sales_member_order;--100704070,4104ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order00;
select count(*) from test.ssd_sales_member_order;--151056105,5567ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order00;
select count(*) from test.ssd_sales_member_order;--201408140,7085ms
insert into test.ssd_sales_member_order
select * from test.ssd_sales_member_order;
select count(*) from test.ssd_sales_member_order;--402816280,13142ms

select filday,count(*) from test.ssd_sales_member_order group by filday;--17.210s
select filmonth,count(*) from test.ssd_sales_member_order group by filmonth;--21.902s
explain
select filyear,count(*) from test.ssd_sales_member_order group by filyear;--21.260s
truncate table test.ssd_sales_member_order
SELECT SUM
    ( anon_1.amt ) AS amt 
FROM
    (
SELECT
    round( SUM ( anon_2.amt ) / COUNT ( DISTINCT ( anon_2.order_id )), 2 ) AS per,
    round(( SUM ( anon_2.purchase_rate ) * 1.0 ) / COUNT ( DISTINCT ( anon_2.order_id )), 2 ) AS purchase_rate,
    count (
    DISTINCT ( anon_2.order_id )) AS COUNT,
    SUM ( anon_2.amt ) AS amt 
FROM
    (
SELECT
    test.ssd_sales_member_order.order_id AS order_id,
    test.ssd_sales_member_order.memberid AS member_id,
    test.ssd_sales_member_order.amt AS amt,
    test.ssd_sales_member_order.purchase_rate AS purchase_rate,
    test.ssd_sales_member_order.cnt AS cnt 
FROM
    test.ssd_sales_member_order 
WHERE
    test.ssd_sales_member_order.filday >= '2018-01-01' 
    AND test.ssd_sales_member_order.filday <= '2018-05-15' 
GROUP BY
    test.ssd_sales_member_order.order_id,
    test.ssd_sales_member_order.memberid,
    test.ssd_sales_member_order.amt,
    test.ssd_sales_member_order.purchase_rate,
    test.ssd_sales_member_order.cnt 
    ) AS anon_2 
GROUP BY
    anon_2.member_id 
    ) AS anon_1 
WHERE
    anon_1.purchase_rate >= 1 
    AND anon_1.purchase_rate < 1.5

相关文章

网友评论

      本文标题:1+2集群gp测试

      本文链接:https://www.haomeiwen.com/subject/dcmpyftx.html