美文网首页非JAVA开发者 - JAVA
CBoard+ES 快速搭建自助交互式数据分析平台(BI)

CBoard+ES 快速搭建自助交互式数据分析平台(BI)

作者: 郭彦超 | 来源:发表于2019-04-29 20:51 被阅读0次

    背景

    之前我们基于ES构建了内容中心的全站搜索,现在作品的制作情况也需要在前端BI层面进行交互分析,BI层的实时聚合部分我们采用ES来实现;为了不影响内容搜索业务的正常运行,我们选择新建一个集群来支撑上述功能,通过reindex操作实现跨集群数据同步;_reindex操作并不是es7.0才有的功能,通过这个操作可以快速实现索引的复制、重建、跨集群迁移

    搭建流程

    整个交互式数据分析平台分两部分构成:CBoard + ES。

    图表操作界面
    • CBoard
      因本篇侧重ES的使用讲解,cboard细节可参看相关资料解决;cboard是一款开源的自助分析工具,主要分4部分组成,创建数据源(这里我们选用ES)> 创建数据集 > 创建图标 > 创建看板。因创建图表功能支持用户拖拉拽操作,所以这部分操作对用户来说非常简单;部署完该项目后基本不需要二次开发,启动后配置完数据源便可以使用;有难度的是自助分析需要有一个强大的实时计算引擎支撑;经测试ES可满足在现有数据的任意维度的聚合分析,图标的加载性能优化后可秒级响应。

    • ES
      a、数据同步;这里采用_reindex的方式每天增量从内容库迁移,配置如下:

    POST _reindex?slices=5&refresh
    {
      "source": {
         "remote": {   #配置需要抽取的ES源地址
          "host": "http://source_host:9200"
        },
        "index": ["scene_model","ls_model","print_model"],   # 指定从哪些数据索引中抽取数据
        "_source": ["code","cover","id", "product", "title", "create_time", "publish_time", "update_time", "total_pv" ,"total_uv" ,"total_form", "login_id", "user_reg_time"],   #执行抽取的维度,对应下面的mapping设置
        "size": 1000,    #抽取批次大小
        "query": {       #增量抽取昨天有过发布更新的数据
          "range": {
            "publish_time": {
              "gte": "now-1d/d"
            }
          }
        }
      },
      "dest": {
        "index": "work_model",  #目标索引名称
        "version_type": "external"  #类似upsert操作
      },
      "script": {
        "lang": "painless",
        "source": "ctx._id = ctx._index.substring('as2_'.length(), ctx._index.length()) + '_' + ctx._id  ;ctx._source.index = ctx._index.substring('as2_'.length(), ctx._index.length()) "    #因目标索引来自多个索引库的数据,为避免Id冲突,给目标索引增加对应的_index前缀
      }
    }
    

    b、mapping & setting

    {
        "mappings" : {
            "dynamic" : "false", 
            "dynamic_templates" : [
              {
                "strings" : {
                  "match_mapping_type" : "string",
                  "mapping" : {
                    "doc_values" : false,
                    "norms" : false,
                    "type" : "keyword"
                  }
                }
              }
            ],
            "date_detection" : false,
            "properties" : {
              "biz_type" : {
                "type" : "keyword" 
              },
              "check_status" : {
                "type" : "keyword" ,
                "doc_values" : false
              },
              "code" : {
                "type" : "keyword" ,
                "doc_values" : false
              }, 
              "cover" : {
                "type" : "keyword",
                "index" : false,
                "doc_values" : false
              },
              "create_time" : {
                "type" : "date",
                "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
              },
              "create_user" : {
                "type" : "keyword" 
              }, 
              "enterprise" : {
                "type" : "keyword" 
              },
              "id" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "is_del" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "login_id" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "member_type" : {
                "type" : "keyword" 
              },
              "product" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "publish_time" : {
                "type" : "date",
                "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
              },
              "share_type" : {
                "type" : "keyword" 
              },
              "template_code" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "template_title" : {
                "type" : "text",
                "index_options" : "freqs",
                "analyzer" : "eqs_analyzer",
                "search_analyzer" : "ik_smart"
              }, 
              "title" : {
                "type" : "text",
                "index_options" : "freqs", 
                "analyzer" : "eqs_analyzer",
                "search_analyzer" : "ik_smart"
              },
              "total_form" : {
                "type" : "integer",
                "ignore_malformed" : true
              },
              "total_pv" : {
                "type" : "integer",
                "ignore_malformed" : true
              },
              "total_spv" : {
                "type" : "integer",
                "ignore_malformed" : true
              },
              "total_uv" : {
                "type" : "integer",
                "ignore_malformed" : true
              },
              "update_time" : {
                "type" : "date",
                "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
              },
              "user_name" : {
                "type" : "keyword",
                "doc_values" : false
              },
              "user_phone" : {
                "type" : "keyword" 
              },
              "user_type" : {
                "type" : "keyword" 
              },
              "yesterday_pv" : {
                "type" : "integer",
                "ignore_malformed" : true
              },
              "yesterday_uv" : {
                "type" : "integer",
                "ignore_malformed" : true
              }
            }
          }
        },
        "settings" : {
          "index" : {
            "refresh_interval" : "120s",
            "translog" : {
              "flush_threshold_size" : "1024mb",
              "sync_interval" : "120s",
              "durability" : "async"
            }, 
            "max_result_window" : "20000", 
            "store" : {
              "type" : "niofs"
            },
            "unassigned" : {
              "node_left" : {
                "delayed_timeout" : "1d"
              }
            },
            "analysis" : {
              "analyzer" : {
                "eqs_highlight_analyzer" : {
                  "filter" : [
                    "unique"
                  ],
                  "type" : "custom",
                  "tokenizer" : "letter"
                },
                "eqs_analyzer" : {
                  "filter" : [
                    "unique"
                  ],
                  "char_filter" : [
                    "html_strip"
                  ],
                  "type" : "custom",
                  "tokenizer" : "ik_max_word"
                }
              }
            },
            "number_of_replicas" : "0",
           
            "codec" : "best_compression",
            "routing" : {
              "allocation" : {
                "total_shards_per_node" : "10"
              }
            },
            "search" : {
              "slowlog" : {
                "level" : "info",
                "threshold" : {
                  "fetch" : {
                    "info" : "500ms"
                  },
                  "query" : {
                    "info" : "1s"
                  }
                }
              }
            },
            "number_of_shards" : "8",
            "merge" : {
              "scheduler" : {
                "max_thread_count" : "2"
              }
            }
          }
        }
      
    }
    
    

    相关文章

      网友评论

        本文标题:CBoard+ES 快速搭建自助交互式数据分析平台(BI)

        本文链接:https://www.haomeiwen.com/subject/azlynqtx.html