#!/usr/bin/env bash
# -----------------------------------------------------------------------------
# Export the record from Hive to Csv
# Author Guo Yu
# Date Jan 9, 2019
# -----------------------------------------------------------------------------
function main(){
let HIVE_PORT=10000
let ALL_TABLE=1
let IF_SZ=1
HIVE_HOST="S1MA11"
DATABASE="rcms"
TABLE_NAME=""
#Resolve the paremeters
while getopts "u:p:d:s:f:t:h" opt
do
case $opt in
u)
HIVE_HOST=$OPTARG;;
p)
HIVE_PORT=$OPTARG;;
s)
let IF_SZ=0;;
d)
DATABASE=$OPTARG;;
t)
let ALL_TABLE=0
#Toggle case
TABLE_NAME=`echo $OPTARG | tr '[A-Z]' '[a-z]'`;;
h|\?)
echo "-u The hostname of hive thift server, the default is localhost"
echo "-p The port of hive thift server, the default is 10000"
echo "-d The database name, the default is rcms"
echo "-s The Csv download_enable YES Or No"
echo "-t <tablename> import the specified table. The defualt is all tables."
echo " And available tables are as following:"
echo " trade_phone_record"
echo " user"
echo " user_monthly"
echo " user_monthly_score"
echo " vcredit_data_prepare_score"
echo " user_comm_arpu"
echo " vcredit_score_hbf"
echo "-h Output usage"
exit 0;;
esac
done
echo "-----------The value of parameters----------------------------"
echo "ALL_TABLE=${ALL_TABLE}"
echo "HIVE_PORT=${HIVE_PORT}"
echo "HIVE_HOST=${HIVE_HOST}"
echo "DATABASE=${DATABASE}"
echo "TABLE_NAME=${TABLE_NAME}"
echo "-----------End of parameters----------------------------------"
#I simply tested the use of beeline and found that beeline of cloudera's distribution always indicated that the file could not be #found when running load data local inpath.
#Go to its official BBS and find that someone has encountered this problem and has not yet resolved it, but supports the full path of HDFS.
#We used something like hive -- e 'load data local inpath' and hive -- f in this script
#Because our hadoop USES kerbos authentication, we need to access the principal in the JDBC url.
#"beeline -u jdbc:hive2://${HIVE_HOST}:${HIVE_PORT}/default -n cmcc -p cmcc "
export CLICMD="hive"
rm -rf /home/guoyu/HiveExportCsv/*
# 1.sh export_hive_table.sh -t "tablename" All_Table=0 Export Only One Table CSV
# 2.sh export_hive_table.sh All_Table=1 Export all Tables CSV
if [[ $? -eq 0 && ${ALL_TABLE} -eq 0 ]]
then
table_arr=($TABLE_NAME)
table_len=${#table_arr[*]}
else
#"vcredit_score_hbf" "vcredit_data_prepare_score" "trade_phone_record" "user_comm_arpu" "user" "user_monthly" "user_monthly_score"
table_arr=("vcredit_score_hbf" "vcredit_data_prepare_score" "trade_phone_record" "user_comm_arpu")
table_len=${#table_arr[*]}
fi
for ((i=0;i<table_len;i++))
do
${CLICMD} -e "use rcms;insert overwrite local directory'/home/guoyu/HiveExport' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' select * from ${table_arr[i]};"
${CLICMD} -e "use rcms;SET hive.cli.print.header=true; select * from ${table_arr[i]} limit 0 ;" | sed -e 's/\t/,/g;s/${table_arr[i]}\.//g' > /home/guoyu/HiveExportCsv/${table_arr[i]}.csv
cat /home/guoyu/HiveExport/* >> /home/guoyu/HiveExportCsv/${table_arr[i]}.csv
sed -i 's/\\N/NULL/g' /home/guoyu/HiveExportCsv/${table_arr[i]}.csv
if [ $IF_SZ -eq 1 ]
then
sz /home/guoyu/HiveExportCsv/${table_arr[i]}.csv
fi
rm -rf /home/guoyu/HiveExport/*
done
}
main $@
网友评论