美文网首页大数据
dolphinscheduler部署文档(集群版)

dolphinscheduler部署文档(集群版)

作者: Rudolf_liu | 来源:发表于2020-03-31 17:33 被阅读0次

    集群规划

    ds集群(生产配置)

    hostname CPU Memory Disk size Master Worker Logger Alert API UI
    DS01 8C 16G 50G
    DS02 8C 16G 50G
    DS03 4C 8G 50G

    若是部署了大数据框架,则机器性能需要进一步提升

    zk集群

    1C-2G-50G

    准备(每台机器都要这样操作)

    创建用户

    # 创建用户需使用root登录,设置部署用户名,请自行修改,后面以dolphinscheduler为例
    useradd dolphinscheduler;
    
    # 设置用户密码,请自行修改,后面以dolphinscheduler123为例
    echo "dolphinscheduler" | passwd --stdin dolphinscheduler
    
    # 配置sudo免密
    echo 'dolphinscheduler  ALL=(ALL)  NOPASSWD: NOPASSWD: ALL' >> /etc/sudoers
    

    hosts映射

    vi /etc/hosts
    
    10.10.xxx.xxx master
    10.10.xxx.xxx slave1
    10.10.xxx.xxx slave2
    

    配置免密登录

    su dolphinscheduler;
    
    ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    chmod 600 ~/.ssh/authorized_keys
    
    # 分发
    su dolphinscheduler;
    for ip in slave1 slave2;     #请将此处ds2 ds3替换为自己要部署的机器的hostname
    do
        ssh-copy-id  $ip   #该操作执行过程中需要手动输入dolphinscheduler用户的密码
    done
    
    在每台机器上使用ssh,需要输入yes,让机器记录下public-key,包括localhost
    

    下载

    后端: wget https://downloads.apache.org/incubator/dolphinscheduler/1.2.0/apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-backend-bin.tar.gz
    前端: wget https://downloads.apache.org/incubator/dolphinscheduler/1.2.0/apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-front-bin.tar.gz
    

    部署后端

    安装

    mkdir -p /opt/app/dolphinscheduler    # ds的安装目录
    mkdir -p /opt/soft/dolphinscheduler    # ds的下载目录
    cd /opt/soft/dolphinscheduler
    tar -zxvf apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-backend-bin.tar.gz -C /opt/app/dolphinscheduler
    mv apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-backend-bin  dolphinscheduler-backend
    
    tree -L 1    # 查看目录结构
    .
    ├── bin           # 基础服务启动脚本
    ├── conf          # 项目配置文件
    ├── DISCLAIMER-WIP# DISCLAIMER文件
    ├── install.sh    # 一键部署脚本
    ├── lib           # 项目依赖jar包,包括各个模块jar和第三方jar
    ├── LICENSE       # LICENSE文件
    ├── licenses      # 运行时license
    ├── NOTICE        # NOTICE文件
    ├── script        # 集群启动、停止和服务监控启停脚本
    └── sql           # 项目依赖sql文件
    
    # 授权解压目录
    sudo chown -R dolphinscheduler:dolphinscheduler dolphinscheduler-backend
    # 授权安装目录
    sudo chown -R dolphinscheduler:dolphinscheduler /opt/app/dolphinscheduler
    

    jdk软连接

    echo $JAVA_HOME    # 没有软连接后面在启动脚本的时候会报错:nohup /bin/java不存在
    sudo ln -s /usr/java/jdk1.8.0_111/bin/java /usr/bin/java
    原先软连接关联的是openjdk的需要删掉
    

    数据库初始化

    • 创建数据库
    mysql -uroot -p
    -- 创建数据库
    CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
    -- 创建用户及授权(可能需要修改数据库密码等级)
    GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
    GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'localhost' IDENTIFIED BY 'dolphinscheduler';
    flush privileges;
    
    • 修改配置
    vi conf/application-dao.properties
    # 需要注释掉postgresql的信息
    # postgre
    #spring.datasource.driver-class-name=org.postgresql.Driver
    #spring.datasource.url=jdbc:postgresql://localhost:5432/dolphinscheduler
    # mysql
    spring.datasource.driver-class-name=com.mysql.jdbc.Driver
    spring.datasource.url=jdbc:mysql://master:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8     #需要修改ip,本机localhost即可
    spring.datasource.username=dolphinscheduler
    spring.datasource.password=dolphinscheduler
    
    • 执行脚本初始化数据表
    sh script/create-dolphinscheduler.sh
    

    尖叫提示:执行脚本前需要下载mysql-java驱动程序包(需要选择对应mysql版本的驱动,不是bin),https://downloads.mysql.com/archives/c-j/
    尖叫提示:如果执行上述脚本报 ”/bin/java: No such file or directory“ 错误,请在/etc/profile下配置 JAVA_HOME 及 PATH 变量

    修改环境变量

    vi conf/env/.dolphinscheduler_env.sh    # 这是一个隐藏文件
    # JAVA_HOME 和 PATH 是必须要配置的,其他没有用到的可以忽略或者注释掉
    #export HADOOP_HOME=/opt/soft/hadoop
    #export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
    #export SPARK_HOME1=/opt/soft/spark1
    #export SPARK_HOME2=/opt/soft/spark2
    #export PYTHON_HOME=/opt/soft/python
    export JAVA_HOME=/usr/java/jdk1.8.0_111
    #export HIVE_HOME=/opt/soft/hive
    #export FLINK_HOME=/opt/soft/flink
    export PATH=$HADOOP_HOME/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$PATH
    

    安装python的zookeeper工具kazoo(每台都要安装)

    #安装pip
    sudo yum -y install python-pip;  #ubuntu请使用 sudo apt-get install python-pip
    sudo pip install kazoo;
    
    # 注意:如果yum没找到python-pip,也可以通过下面方式安装
    sudo curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
    sudo python get-pip.py  # 如果是python3,使用sudo python3 get-pip.py
    #然后
    sudo pip install kazoo;
    

    修改一键部署脚本

    vi install.sh
    # for example postgresql or mysql ...
    dbtype="mysql"
    
    # db config
    # db address and port
    dbhost="master:3306"
    
    # db name
    dbname="dolphinscheduler"
    
    # db username
    username="dolphinscheduler"
    
    # db passwprd
    # Note: if there are special characters, please use the \ transfer character to transfer
    passowrd="dolphinscheduler"
    
    # conf/config/install_config.conf config
    # Note: the installation path is not the same as the current path (pwd)
    # 将应用安装到哪个目录下
    installPath="/opt/app/dolphinscheduler"
    
    # deployment user
    # Note: the deployment user needs to have sudo privileges and permissions to operate hdfs. If hdfs is enabled, the root directory needs to be created by itself
    # 使用哪个用户部署
    deployUser="dolphinscheduler"
    
    # zk cluster,集群方式以逗号分隔
    zkQuorum="master:2181,slave1:2181,slave2:2181"
    
    # install hosts
    # Note: install the scheduled hostname list. If it is pseudo-distributed, just write a pseudo-distributed hostname
    # 在哪些机器上部署
    ips="master,slave1,slave2"
    
    # conf/config/run_config.conf config
    # run master machine
    # Note: list of hosts hostname for deploying master
    masters="master,slave1"
    
    # run worker machine
    # note: list of machine hostnames for deploying workers
    workers="master,slave1,slave2"
    
    # run alert machine
    # note: list of machine hostnames for deploying alert server
    alertServer="master"
    
    # run api machine
    # note: list of machine hostnames for deploying api server
    apiServers="master"
    
    # 邮件服务
    # alert config
    # mail protocol
    mailProtocol="SMTP"
    
    # mail server host
    mailServerHost="****"
    
    # mail server port
    mailServerPort="***"
    
    # sender
    mailSender="*******"
    
    # user
    mailUser="*******"
    
    # sender password
    mailPassword="*******"
    
    # TLS mail protocol support
    starttlsEnable="false"
    
    sslTrust="*******"
    
    # SSL mail protocol support
    # note: The SSL protocol is enabled by default.
    # only one of TLS and SSL can be in the true state.
    sslEnable="true"
    
    # download excel path
    xlsFilePath="/tmp/xls"
    
    # 业务用到的比如sql等资源文件上传到哪里,可以设置:HDFS,S3,NONE,单机如果想使用本地文件系统,请配置为HDFS,因为HDFS支持本地文件系统;如果不需要资源上传功能请选择NONE。强调一点:使用本地文件系统不需要部署hadoop
    resUploadStartupType="HDFS"
    
    # 这里以保存到本地文件系统为例
    #注:但是如果你想上传到HDFS的话,NameNode启用了HA,则需要将core-site.xml和hdfs-site.xml放到conf目录下,本例即是放到/opt/dolphinscheduler/conf下面,并配置namenode cluster名称;如果NameNode不是HA,则修改为具体的ip或者主机名即可
    defaultFS="hdfs://hadoopcluster/dolphinscheduler"    #hdfs://{具体的ip/主机名}:8020
    
    # 如果ResourceManager是HA,则配置为ResourceManager节点的主备ip或者hostname,比如"192.168.xx.xx,192.168.xx.xx",否则如果是单ResourceManager或者根本没用到yarn,请配置yarnHaIps=""即可,我这里没用到yarn,配置为""
    yarnHaIps=""
    
    # 如果是单ResourceManager,则配置为ResourceManager节点ip或主机名,否则保持默认值即可。我这里没用到yarn,保持默认
    singleYarnIp=""
    
    # 由于hdfs支持本地文件系统,需要确保本地文件夹存在且有读写权限
    hdfsPath="/data/dolphinscheduler"
    

    执行一键部署脚本

    su dolphinscheduler
    
    sh install.sh
    

    查看进程是否启动

    jps
    
    master
    MasterServer         ----- master服务
    WorkerServer         ----- worker服务
    LoggerServer         ----- logger服务
    ApiApplicationServer ----- api服务
    AlertServer          ----- alert服务
    
    slave1
    MasterServer         ----- master服务
    WorkerServer         ----- worker服务
    LoggerServer         ----- logger服务
    
    slave2
    WorkerServer         ----- worker服务
    LoggerServer         ----- logger服务
    

    解压

    tar -zxvf apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-front-bin.tar.gz -C /opt/soft/dolphinscheduler;
    mv apache-dolphinscheduler-incubating-1.2.0-dolphinscheduler-front-bin dolphinscheduler-ui
    

    部署前端

    部署

    自动化部署

    • 执行自动化部署脚本
    cd dolphinscheduler-ui;
    sh ./install-dolphinscheduler-ui.sh;
    执行后,会在运行中请键入前端端口,默认端口是8888,如果选择默认,键入回车,或者键入其他端口
    然后会让键入跟前端ui交互的api-server的ip
    接着是让键入跟前端ui交互的api-server的port
    接着是操作系统选择
    等待部署完成
    
    • 修改nginx配置
    vi /etc/nginx/nginx.conf
    # add param 在http方法体内添加即可
    client_max_body_size 1024m;
    systemctl restart nginx
    

    手动部署

    vi /etc/nginx/nginx.conf
    server {
      listen       8888;# 访问端口(自行修改)
      server_name  localhost;
      #charset koi8-r;
      #access_log  /var/log/nginx/host.access.log  main;
      location / {
          root   /opt/app/dolphinscheduler-ui/dist;      # 前端解压的dist目录地址(自行修改)
          index  index.html index.html;
      }
      location /dolphinscheduler {
          proxy_pass http://localhost:12345;    # api服务地址(自行修改)
          proxy_set_header Host $host;
          proxy_set_header X-Real-IP $remote_addr;
          proxy_set_header x_real_ipP $remote_addr;
          proxy_set_header remote_addr $remote_addr;
          proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
          proxy_http_version 1.1;
          proxy_connect_timeout 4s;
          proxy_read_timeout 30s;
          proxy_send_timeout 12s;
          proxy_set_header Upgrade $http_upgrade;
          proxy_set_header Connection "upgrade";
      }
      #error_page  404              /404.html;
      # redirect server error pages to the static page /50x.html
      #
      error_page   500 502 503 504  /50x.html;
      location = /50x.html {
          root   /usr/share/nginx/html;
      }
    }
    

    初始账号密码

    地址:master:8888 用户名密码:admin/dolphinscheduler123

    指令

    启动:
        启动所有进程:sh bin/start-all.sh
        启动master-server:sh bin/dolphinscheduler-daemon.sh start master-server
        启动worker-server:sh bin/dolphinscheduler-daemon.sh start worker-server
        启动logger-server:sh bin/dolphinscheduler-daemon.sh start logger-server
        启动alert-server:sh bin/dolphinscheduler-daemon.sh start alert-server
        启动api-server:sh bin/dolphinscheduler-daemon.sh start api-server
    
    停止:
        停止所有进程:sh bin/stop-all.sh
        停止master-server:sh bin/dolphinscheduler-daemon.sh stop master-server
        停止worker-server:sh bin/dolphinscheduler-daemon.sh stop worker-server
        停止logger-server:sh bin/dolphinscheduler-daemon.sh stop logger-server
        停止alert-server:sh bin/dolphinscheduler-daemon.sh stop alert-server
        停止api-server:sh bin/dolphinscheduler-daemon.sh stop api-server
    

    相关文章

      网友评论

        本文标题:dolphinscheduler部署文档(集群版)

        本文链接:https://www.haomeiwen.com/subject/wuhguhtx.html