1、discp传输数据
再传输过程中有写入或者删除会导致传输失败
#增量更新,更新时覆盖,带宽100MB/s,跳过检查版本,设置队列
hadoop distcp -Dmapreduce.job.queuename=ana hdfs://xxx/user/hive/warehouse hdfs://xxx/apps/warehouse/
#可以设置队列
#设置带宽
2、导出元数据表
mysqldump -uhive -hxxxx -phive hive > hive-cmal-metastore.sql
#替换hive-metastore.bak的存储位置,需要根据当时调整
替换defaultFS:ssed 's/hdfs:\/\/bigdatacluster\/user\/hive\/warehouse\/hdfs:\/\/testCluster\/apps\/hive_test\/warehouse/g' hive-cmal-metastore.sql
3、导入到新的meta 元数据库
mysql -h xxx -uhive -pxxx hive_meta < hive-metastore.sql
4、修复分区
因为分区表中目录没有分区字段,只能通过alter
附录: 导出原有hive 表分区
1、 导出库名
export HADOOP_USER_NAME=hive
mkdir ./sql
mkdir ./tmp
#查找库名
hive -e 'show databases;' > ./tmp/dbname.txt
#导出建库语句
while read dbname
do
hive -e "show create database $dbname;" >> ./sql/create_dbs.sql
echo ";" >> ./sql/create_dbs.sql
done < ./tmp/dbname.txt
#查找表
while read dbname
do
hive -e "use $dbname;show tables;" > ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt
#导出建表语句
while read dbname
do
echo "use ${dbname};" > ./sql/create_${dbname}_tables.sql
while read tbname
do
hive -e "use $dbname;show create table $tbname;" >> ./sql/create_${dbname}_tables.sql
echo ";" >> ./sql/create_${dbname}_tables.sql
done < ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt
2、生层alter语句
#添加分区
while read dbname
do
while read tbname
do
echo "dbname: ${dbname},tbname: ${tbname}"
partition_file=./partitions/${dbname}/${tbname}_partitions.txt
if [[ -s ${partition_file} ]]; then
while read partions
do
partion_key=`echo $partions |awk -F'=' '{print $1}'`
partion_value=`echo $partions |awk -F'=' '{print $2}'`
echo "alter table ${dbname}.${tbname} add partition(${partion_key}='"${partion_value}"');" >> ./alter_partitions.txt
done < ${partition_file}
else
echo "empty"
fi
done < ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt
网友评论