美文网首页
Superset系列3-测试数据准备

Superset系列3-测试数据准备

作者: 只是甲 | 来源:发表于2021-11-17 09:57 被阅读0次

    环境:
    MySQL 8.0

    一. 准备维表数据

    1.1 时间维度

    create or replace view dim_date as
    with recursive dt(date_in,n) as
    ( select '2020-01-01' as date_in, 1 as n
      union all
      select adddate(date_in,1), n+1 from dt where n< 366
    )
    select * from dt;
    
    
    
    select * from dim_date;
    

    通过上述代码,生成2020年每一天的日期。


    image.png

    1.2 产品维度

    create or replace view dim_prod as
    with recursive dt(n) as
    ( select 1 as n
      union all
      select n+1 from dt where n<=10
    )
    select concat('PROD',n) as prod_name,n from dt;
    
    select * from dim_prod;
    
    image.png

    1.3 地区维度

    drop table if exists dim_area;
    
    create table dim_area
    ( id  int AUTO_INCREMENT primary key ,
      area_code varchar(50) comment  '代码',
      iso_name  varchar(50) comment  'ISO名称',
      area_name varchar(50) comment  '行政区名称',
      area_type varchar(50) comment  '类型',
      area_code_old varchar(50) comment  '代码-老版本'
    );
    
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-AH','Anhui Sheng','安徽省','省','CN-34');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-BJ','Beijing Shi','北京市','直辖市','CN-11');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-CQ','Chongqing Shi','重庆市','直辖市','CN-50');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-FJ','Fujian Sheng','福建省','省','CN-35');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-GD','Guangdong Sheng','广东省','省','CN-44');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-GS','Gansu Sheng','甘肃省','省','CN-62');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-GX','Guangxi Zhuangzu Zizhiqu','广西壮族自治区','自治区','CN-45');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-GZ','Guizhou Sheng','贵州省','省','CN-52');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HA','Henan Sheng','河南省','省','CN-41');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HB','Hubei Sheng','湖北省','省','CN-42');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HE','Hebei Sheng','河北省','省','CN-13');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HI','Hainan Sheng','海南省','省','CN-46');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HK','Xianggang Tebiexingzhengqu','香港特别行政区','特别行政区','CN-91');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HL','Heilongjiang Sheng','黑龙江省','省','CN-23');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-HN','Hunan Sheng','湖南省','省','CN-43');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-JL','Jilin Sheng','吉林省','省','CN-22');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-JS','Jiangsu Sheng','江苏省','省','CN-32');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-JX','Jiangxi Sheng','江西省','省','CN-36');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-LN','Liaoning Sheng','辽宁省','省','CN-21');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-MO','Aomen Tebiexingzhengqu','澳门特别行政区','特别行政区','CN-92');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-NM','Nei Mongol Zizhiqu','内蒙古自治区','自治区','CN-15');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-NX','Ningxia Huizu Zizhiqu','宁夏回族自治区','自治区','CN-64');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-QH','Qinghai Sheng','青海省','省','CN-63');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-SC','Sichuan Sheng','四川省','省','CN-51');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-SD','Shandong Sheng','山东省','省','CN-37');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-SH','Shanghai Shi','上海市','直辖市','CN-31');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-SN','Shaanxi Sheng','陕西省','省','CN-61');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-SX','Shanxi Sheng','山西省','省','CN-14');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-TJ','Tianjin Shi','天津市','直辖市','CN-12');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-TW','Taiwan Sheng','台湾省[注2]','省','CN-71');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-XJ','Xinjiang Uygur Zizhiqu','新疆维吾尔自治区','自治区','CN-65');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-XZ','Xizang Zizhiqu','西藏自治区','自治区','CN-54');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-YN','Yunnan Sheng','云南省','省','CN-53');
    insert into dim_area (area_code, iso_name, area_name, area_type,area_code_old) values ('CN-ZJ','Zhejiang Sheng','浙江省','省','CN-33');
    

    superset中的国家地图反正用的是老版的


    image.png

    二.通过维度表及随机数生成测试数据

    drop table if exists fact_sale;
    
    
    create table fact_sale(date_in date,prod_name varchar(50),area_code varchar(50), sale_num varchar(50), sale_amount varchar(50));
    
    insert into fact_sale
    with recursive dt(n) as
    ( select 1 as n
      union all
      select n+1 from dt where n < 100
    )
    select dd.date_in,
           dp.prod_name,
           da.area_code_old as  area_code,
           ceil(rand()*100) as  sale_num,
           ceil(rand()*10000) as  sale_amount
      from dim_area da
      join dim_date dd
      join dim_prod dp
      join dt
      on dt.n <= ceil(rand()*10)
    order by dd.date_in,
            dp.prod_name,
            da.area_code_old;
    

    通过维度表及随机数去构造一个事实表


    image.png

    参考:

    1. https://blog.csdn.net/weixin_38204423/article/details/113607111
    2. https://www.it1352.com/790136.html
    3. https://dev.maxmind.com/geoip/legacy/codes

    相关文章

      网友评论

          本文标题:Superset系列3-测试数据准备

          本文链接:https://www.haomeiwen.com/subject/jvbxzltx.html