美文网首页
Hive源码学习——hive脚本的简单学习

Hive源码学习——hive脚本的简单学习

作者: 僕名前 | 来源:发表于2021-12-09 10:03 被阅读0次

跟着大佬的博客,看了一下hive的启动脚本的逻辑,现将自己的学习记录一下。
位置:${HIVE_HOME}/bin/hive

#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cygwin=false
case "`uname`" in
   CYGWIN*) cygwin=true;;
esac
#首先切换到对应的脚本目录下,然后获取了当前路径bin
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`

. "$bin"/hive-config.sh

TMP_USER_DIR="/tmp/${USER}"
STDERR="${TMP_USER_DIR}/stderr"
SERVICE=""
HELP=""
SKIP_HBASECP=false
SKIP_HADOOPVERSION=false

SERVICE_ARGS=()
#解析参数
while [ $# -gt 0 ]; do
  case "$1" in
    --version)
      shift
      SERVICE=version
      ;;
    --service)
      shift
      SERVICE=$1
      shift
      ;;
    --rcfilecat)
      SERVICE=rcfilecat
      shift
      ;;
    --orcfiledump)
      SERVICE=orcfiledump
      shift
      ;;
    --llapdump)
      SERVICE=llapdump
      shift
      ;;
    --skiphadoopversion)
      SKIP_HADOOPVERSION=true
      shift
      ;;
    --skiphbasecp)
      SKIP_HBASECP=true
      shift
      ;;
    --help)
      HELP=_help
      shift
      ;;
    --debug*)
      DEBUG=$1
      shift
      ;;
    *)
      SERVICE_ARGS=("${SERVICE_ARGS[@]}" "$1")
      shift
      ;;
  esac
done

#输入hive,则SERVICE="cli"
if [ "$SERVICE" = "" ] ; then
  if [ "$HELP" = "_help" ] ; then
    SERVICE="help"
  else
    SERVICE="cli"
  fi
fi
#=~ 判断左侧的string和右侧的正则表达式是否匹配
if [[ "$SERVICE" =~ ^(help|version|orcfiledump|rcfilecat|schemaTool|cleardanglingscratchdir|metastore|beeline|llapstatus|llap)$ ]] ; then
  SKIP_HBASECP=true
fi

if [[ "$SERVICE" =~ ^(help|schemaTool)$ ]] ; then
  SKIP_HADOOPVERSION=true
fi

#-f 判断是否为常规文件
if [ -f "${HIVE_CONF_DIR}/hive-env.sh" ]; then
  . "${HIVE_CONF_DIR}/hive-env.sh"
fi

#-z 判断$SPARK_HOME"字符串长度是否为0,如果是,则为真
if [[ -z "$SPARK_HOME" ]]
then
  bin=`dirname "$0"`
  # many hadoop installs are in dir/{spark,hive,hadoop,..}
  #-e 判断bin/../../spark文件是否存在,如果是,则为真
  if test -e $bin/../../spark; then 
    sparkHome=$(readlink -f $bin/../../spark)
    #-d 判断$sparkHome是否是目录,如果是,则为真
    if [[ -d $sparkHome ]]
    then
      export SPARK_HOME=$sparkHome
    fi
  fi
fi

CLASSPATH="${HIVE_CONF_DIR}"

HIVE_LIB=${HIVE_HOME}/lib

# needed for execution
if [ ! -f ${HIVE_LIB}/hive-exec-*.jar ]; then
  echo "Missing Hive Execution Jar: ${HIVE_LIB}/hive-exec-*.jar"
  exit 1;
fi

if [ ! -f ${HIVE_LIB}/hive-metastore-*.jar ]; then
  echo "Missing Hive MetaStore Jar"
  exit 2;
fi

# cli specific code
if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
  echo "Missing Hive CLI Jar"
  exit 3;
fi

for f in ${HIVE_LIB}/*.jar; do
  CLASSPATH=${CLASSPATH}:$f;
done

# add the auxillary jars such as serdes
if [ -d "${HIVE_AUX_JARS_PATH}" ]; then
  hive_aux_jars_abspath=`cd ${HIVE_AUX_JARS_PATH} && pwd`
  for f in $hive_aux_jars_abspath/*.jar; do
    if [[ ! -f $f ]]; then
        continue;
    fi
    if $cygwin; then
    f=`cygpath -w "$f"`
    fi
    AUX_CLASSPATH=${AUX_CLASSPATH}:$f
    if [ "${AUX_PARAM}" == "" ]; then
        AUX_PARAM=file://$f
    else
        AUX_PARAM=${AUX_PARAM},file://$f;
    fi
  done
elif [ "${HIVE_AUX_JARS_PATH}" != "" ]; then 
  HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/,/:/g'`
  if $cygwin; then
      HIVE_AUX_JARS_PATH=`cygpath -p -w "$HIVE_AUX_JARS_PATH"`
      HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/;/,/g'`
  fi
  AUX_CLASSPATH=${AUX_CLASSPATH}:${HIVE_AUX_JARS_PATH}
  AUX_PARAM="file://$(echo ${HIVE_AUX_JARS_PATH} | sed 's/:/,file:\/\//g')"
fi

# adding jars from auxlib directory
for f in ${HIVE_HOME}/auxlib/*.jar; do
  if [[ ! -f $f ]]; then
      continue;
  fi
  #cygpath用于unix path和windows path之间的转换
  #cygpath -w  转为windows路径
  if $cygwin; then
      f=`cygpath -w "$f"`
  fi
  AUX_CLASSPATH=${AUX_CLASSPATH}:$f
  if [ "${AUX_PARAM}" == "" ]; then
    AUX_PARAM=file://$f
  else
    AUX_PARAM=${AUX_PARAM},file://$f;
  fi
done
if $cygwin; then
  #cygpath -p -w 转为windows下的文件列表
    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
    CLASSPATH=${CLASSPATH};${AUX_CLASSPATH}
else
    CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}
fi

# supress the HADOOP_HOME warnings in 1.x.x
export HADOOP_HOME_WARN_SUPPRESS=true 

# to make sure log4j2.x and jline jars are loaded ahead of the jars pulled by hadoop
export HADOOP_USER_CLASSPATH_FIRST=true

# pass classpath to hadoop
if [ "$HADOOP_CLASSPATH" != "" ]; then
  export HADOOP_CLASSPATH="${CLASSPATH}:${HADOOP_CLASSPATH}"
else
  export HADOOP_CLASSPATH="$CLASSPATH"
fi

# also pass hive classpath to hadoop
if [ "$HIVE_CLASSPATH" != "" ]; then
  export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HIVE_CLASSPATH}";
fi

# check for hadoop in the path
HADOOP_IN_PATH=`which hadoop 2>/dev/null`
if [ -f ${HADOOP_IN_PATH} ]; then
  HADOOP_DIR=`dirname "$HADOOP_IN_PATH"`/..
fi
# HADOOP_HOME env variable overrides hadoop in the path
HADOOP_HOME=${HADOOP_HOME:-${HADOOP_PREFIX:-$HADOOP_DIR}}
if [ "$HADOOP_HOME" == "" ]; then
  echo "Cannot find hadoop installation: \$HADOOP_HOME or \$HADOOP_PREFIX must be set or hadoop must be in the path";
  exit 4;
fi

# add distcp to classpath, hive depends on it
for f in ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-distcp-*.jar; do
  export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f;
done

HADOOP=$HADOOP_HOME/bin/hadoop
if [ ! -f ${HADOOP} ]; then
  echo "Cannot find hadoop installation: \$HADOOP_HOME or \$HADOOP_PREFIX must be set or hadoop must be in the path";
  exit 4;
fi

#如果$TMP_USER_DIR不是目录,那么创建该目录
if [ ! -d ${TMP_USER_DIR} ]; then
  mkdir -p ${TMP_USER_DIR} 2> /dev/null
  if [ $? -ne 0 ]; then
    STDERR="/dev/tty"
  fi
fi

if [ "${STDERR}" != "/dev/null" ] && [ ! -f ${STDERR} ]; then
  touch ${STDERR} 2> /dev/null
  if [ $? -ne 0 ]; then
    STDERR="/dev/tty"
  fi
fi

if [ "$SKIP_HADOOPVERSION" = false ]; then
  # Make sure we're using a compatible version of Hadoop
  if [ "x$HADOOP_VERSION" == "x" ]; then
      HADOOP_VERSION=$($HADOOP version 2>> ${STDERR} | awk -F"\t" '/Hadoop/ {print $0}' | cut -d' ' -f 2);
  fi
  
  # Save the regex to a var to workaround quoting incompatabilities
  # 将正则表达式保存到var以解决引用不稳定性的问题
  # between Bash 3.1 and 3.2
  hadoop_version_re="^([[:digit:]]+)\.([[:digit:]]+)(\.([[:digit:]]+))?.*$"
  

  # =~ 用于判断string和右边的正则表达式pattern是否匹配
  if [[ "$HADOOP_VERSION" =~ $hadoop_version_re ]]; then
      hadoop_major_ver=${BASH_REMATCH[1]}
      hadoop_minor_ver=${BASH_REMATCH[2]}
      hadoop_patch_ver=${BASH_REMATCH[4]}
  else
      echo "Unable to determine Hadoop version information."
      echo "'hadoop version' returned:"
      echo `$HADOOP version`
      exit 5
  fi
  
  #-a 如果file存在则为真
  if [ "$hadoop_major_ver" -lt "1" -a  "$hadoop_minor_ver$hadoop_patch_ver" -lt "201" ]; then
      echo "Hive requires Hadoop 0.20.x (x >= 1)."
      echo "'hadoop version' returned:"
      echo `$HADOOP version`
      exit 6
  fi
fi

if [ "$SKIP_HBASECP" = false ]; then
  # HBase detection. Need bin/hbase and a conf dir for building classpath entries.
  # Start with BigTop defaults for HBASE_HOME and HBASE_CONF_DIR.
  HBASE_HOME=${HBASE_HOME:-"/usr/lib/hbase"}
  HBASE_CONF_DIR=${HBASE_CONF_DIR:-"/etc/hbase/conf"}
  if [[ ! -d $HBASE_CONF_DIR ]] ; then
    # not explicitly set, nor in BigTop location. Try looking in HBASE_HOME.
    HBASE_CONF_DIR="$HBASE_HOME/conf"
  fi
  
  # perhaps we've located the HBase config. if so, include it on classpath.
  if [[ -d $HBASE_CONF_DIR ]] ; then
    export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HBASE_CONF_DIR}"
  fi
  
  # look for the hbase script. First check HBASE_HOME and then ask PATH.
  if [[ -e $HBASE_HOME/bin/hbase ]] ; then
    HBASE_BIN="$HBASE_HOME/bin/hbase"
  fi
  HBASE_BIN=${HBASE_BIN:-"$(which hbase)"}
  
  # perhaps we've located HBase. If so, include its details on the classpath
  if [[ -n $HBASE_BIN ]] ; then
    # exclude ZK, PB, and Guava (See HIVE-2055)
    # depends on HBASE-8438 (hbase-0.94.14+, hbase-0.96.1+) for `hbase mapredcp` command
    for x in $($HBASE_BIN mapredcp 2>> ${STDERR} | tr ':' '\n') ; do
      if [[ $x == *zookeeper* || $x == *protobuf-java* || $x == *guava* ]] ; then
        continue
      fi
      # TODO: should these should be added to AUX_PARAM as well?
      export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${x}"
    done
  fi
fi

if [ "${AUX_PARAM}" != "" ]; then
  if [[ "$SERVICE" != beeline ]]; then
    HIVE_OPTS="$HIVE_OPTS --hiveconf hive.aux.jars.path=${AUX_PARAM}"
  fi
  AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
fi

#引入其他脚本的服务
#$bin/ext/下的脚本
SERVICE_LIST=""

for i in "$bin"/ext/*.sh ; do
  . $i
done

for i in "$bin"/ext/util/*.sh ; do
  . $i
done


#启动服务
#我们需要执行的服务,也就是需要执行的函数
if [ "$DEBUG" ]; then
  if [ "$HELP" ]; then
    debug_help
    exit 0
  else
    get_debug_params "$DEBUG"
    export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS $HIVE_MAIN_CLIENT_DEBUG_OPTS"
  fi
fi

#如果只输入hive,那么TORUN=cli
TORUN=""
for j in $SERVICE_LIST ; do
  if [ "$j" = "$SERVICE" ] ; then
    TORUN=${j}$HELP
  fi
done

# to initialize logging for all services

export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dlog4j.configurationFile=hive-log4j2.properties "

if [ -f "${HIVE_CONF_DIR}/parquet-logging.properties" ]; then
  export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djava.util.logging.config.file=${HIVE_CONF_DIR}/parquet-logging.properties "
else
  export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djava.util.logging.config.file=$bin/../conf/parquet-logging.properties "
fi

if [[ "$SERVICE" =~ ^(hiveserver2|beeline|cli)$ ]] ; then
  # If process is backgrounded, don't change terminal settings
  if [[ ( ! $(ps -o stat= -p $$) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
    export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djline.terminal=jline.UnsupportedTerminal"
  fi
fi

#判断到底启动那个服务
if [ "$TORUN" = "" ] ; then
  echo "Service $SERVICE not found"
  echo "Available Services: $SERVICE_LIST"
  exit 7
else
  #执行服务
  set -- "${SERVICE_ARGS[@]}"
  $TORUN "$@"
fi

调用ext目录下的cli.sh

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

THISSERVICE=cli
export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "

# Set old CLI as the default client
# if USE_DEPRECATED_CLI is not set or is not equal to false use old CLI
if [ -z "$USE_DEPRECATED_CLI" ] || [ "$USE_DEPRECATED_CLI" != "false" ]; then
  USE_DEPRECATED_CLI="true"
fi

updateCli() {
  if [ "$USE_DEPRECATED_CLI" == "true" ]; then
    export HADOOP_CLIENT_OPTS=" -Dproc_hivecli $HADOOP_CLIENT_OPTS "
    CLASS=org.apache.hadoop.hive.cli.CliDriver
    JAR=hive-cli-*.jar
  else
    export HADOOP_CLIENT_OPTS=" -Dproc_beeline $HADOOP_CLIENT_OPTS -Dlog4j.configurationFile=beeline-log4j2.properties"
    CLASS=org.apache.hive.beeline.cli.HiveCli
    JAR=hive-beeline-*.jar
  fi
}

cli () {
  updateCli
  execHiveCmd $CLASS $JAR "$@"
}

cli_help () {
  updateCli
  execHiveCmd $CLASS $JAR "--help"
}

execHiveCmd函数,在${HIVE_HOME}/bin/ext/util/execHiveCmd.sh中定义

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

CLI_JAR="hive-cli-*.jar"
BEELINE_JAR="hive-beeline-*.jar"

execHiveCmd () {
  CLASS=$1;
  shift;

  # if jar is not passed as parameter use corresponding cli jar
  if [ "$1" == "$CLI_JAR" ] || [ "$1" == "$BEELINE_JAR" ]; then
    JAR="$1"
    shift;
  else
    if [ "$USE_DEPRECATED_CLI" == "true" ]; then
      JAR="$CLI_JAR"
    else
      JAR="$BEELINE_JAR"
    fi
  fi

  # cli specific code
  if [ ! -f ${HIVE_LIB}/$JAR ]; then
    echo "Missing $JAR Jar"
    exit 3;
  fi

  if $cygwin; then
    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
  fi
  #调用exec 启动jar包
  # hadoop 20 or newer - skip the aux_jars option. picked up from hiveconf
  exec $HADOOP jar ${HIVE_LIB}/$JAR $CLASS $HIVE_OPTS "$@"
}

总结:

1.hive脚本的工作:首先判断命令行输入的参数,其次加载部分资源,执行对应的jar包

至此,对于hive脚本的简单学习完成。

参考:
https://blog.csdn.net/king14bhhb/article/details/118424755

https://cloud.tencent.com/developer/article/1476983?from=15425

https://www.cnblogs.com/zsmynl/p/6768551.html

相关文章

网友评论

      本文标题:Hive源码学习——hive脚本的简单学习

      本文链接:https://www.haomeiwen.com/subject/xvxffrtx.html