美文网首页
一段读取 excel 自动生成 hive load shell

一段读取 excel 自动生成 hive load shell

作者: 六层楼那么高 | 来源:发表于2018-11-13 10:00 被阅读0次

    自动化是程序员的天性,这里是一段小代码自动化手工重复的工作。

    package collect;
    
    import com.google.common.base.Charsets;
    import com.google.common.base.Joiner;
    import com.google.common.io.Files;
    import com.google.common.io.LineProcessor;
    import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
    import org.apache.poi.ss.usermodel.Cell;
    import org.apache.poi.ss.usermodel.Sheet;
    import org.apache.poi.ss.usermodel.Workbook;
    import org.apache.poi.ss.usermodel.WorkbookFactory;
    
    import java.io.File;
    import java.io.FilenameFilter;
    import java.io.IOException;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * All rights Reserved, Designed By Migu.cn
     *
     * @Description: 读取表 Excel 信息,自动生成动态分区脚本
     * @Author: Yao
     * @Date: 2018/5/8 9:27
     * @Version: v0.1
     */
    public class AutoGenSh {
    
        public static final String template_file = "dynamicLoadTemplate.txt";
        public static final String interface_name = "%interface_name%";
        public static final String date = "%date%";
        public static final String table_name = "%ods_table_name%";
        public static final String file_name = "%file_name%";
        public static final String tmpTableName = "%tmp_table_name%";
        public static final String start_index = "%startIndex%";
        public static final String column_list = "%column_list%";
    
    
        static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式
    
    
        public static void main(String[] args) {
            // read config from configfile
            String currentDir = System.getProperty("user.dir");
            File[] confFiles = new File(currentDir).listFiles(new FilenameFilter() {
                public boolean accept(File dir, String name) {
                    return name.endsWith(".conf");
                }
            });
    
            for (File conf : confFiles) {
                String confName = conf.getName();
    
                // read config find interfaceName and fileName;
                try {
                    List<String> lines = Files.readLines(conf, Charsets.UTF_8);
                    final String interfaceName = lines.get(0).substring(lines.get(0).indexOf("=") + 1);
                    final String fileName = lines.get(1).substring(lines.get(1).indexOf("=") + 1);
                    final String tableName = confName.substring(0, confName.indexOf("."));
    
                    // find start index from filename
                    final int startDateIndex = findDateIndex(fileName);
    
                    // extract columns from excel
                    String excelPath = currentDir + File.separator + tableName + ".xlsx";
                    // don't miss last ,
                    File excelFile = new File(excelPath);
                    final String columns = extractColumns(excelFile) + ",";
    
    
                    // final text after replace
                    final List<String> replaceResult = new ArrayList<>();
                    final String currentDate = dateFormat.format(new Date());
    
                    File templateFile = new File(currentDir + File.separator + template_file);
                    Files.readLines(templateFile, Charsets.UTF_8, new LineProcessor<List<String>>() {
                        public boolean processLine(String s) throws IOException {
                            s = s.replace(interface_name, interfaceName)
                                    .replace(date, currentDate)
                                    .replace(table_name, tableName)
                                    .replace(tmpTableName, tableName + "tmp")
                                    .replace(column_list, columns)
                                    .replace(file_name, fileName)
                                    .replace(start_index, startDateIndex + "");
                            return replaceResult.add(s);
                        }
    
                        public List<String> getResult() {
                            return replaceResult;
                        }
                    });
    
                    String shText = Joiner.on("\n").join(replaceResult);
    
                    // export sh file
                    // mkdir and move file
                    String newDir = currentDir + File.separator + tableName;
                    new File(newDir).mkdirs();
    
                    // move xlsx and conf
                    conf.renameTo(new File(newDir + "/" + conf.getName()));
    
                    File eFile = new File(excelPath);
                    eFile.renameTo(new File(newDir + "/" + eFile.getName()));
    
                    String destShFile = newDir + "/" + tableName + "_2ods.sh";
                    Files.write(shText.getBytes(), new File(destShFile));
                    System.out.println("success export sh file: " + destShFile);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
    
    
        }
    
        /**
         * extract columns from excel
         *
         * @param templateFile
         * @return
         */
        private static String extractColumns(File templateFile) {
            Workbook excel = null;
            try {
                List<String> list = new ArrayList<String>();
                excel = WorkbookFactory.create(templateFile);
                Sheet sheet = excel.getSheetAt(0);
                for (int i = 0; i <= sheet.getLastRowNum(); i++) {
                    Cell cell = sheet.getRow(i).getCell(0);
                    String value = cell.getStringCellValue();
    
                    // skip filename filedname in_date
                    if (!value.toLowerCase().contains("filename")
                            && !value.toLowerCase().contains("in_date")
                            && !value.toLowerCase().contains("fieldname")) {
                        list.add("  " + value);
                    }
                }
    
                return Joiner.on(",\n").join(list);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InvalidFormatException e) {
                e.printStackTrace();
            } finally {
                if (excel != null) {
                    try {
                        excel.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
            return null;
        }
    
        /**
         * 从文件名中获取 substring start index
         *
         * @param fileName i-client-base_content_20180503_ssms_000000.txt
         * @return
         */
        private static int findDateIndex(String fileName) {
            if (fileName.contains("YYYYMMDD")) {
                return fileName.indexOf("YYYYMMDD") + 1;
            }
    
            String extractDateRegex = "(\\d{8})";
            Matcher matcher = Pattern.compile(extractDateRegex).matcher(fileName);
            if (matcher.find()) {
                String date = matcher.group(1);
                return fileName.indexOf(date) + 1;
            }
    
            return -1;
        }
    }
    
    

    Shell 脚本模板定义如下

    #!/bin/sh
    #
    #***************************************************************************************************
    # **  文件名称:   %interface_name% 入 hive ods
    # **  功能描述:   从临时表加载 动态分区
    # **              
    # **  创建者:     Chen Yao
    # **  创建日期:   %date%
    # **  修改日志:   
    # **  修改日期          修改人          修改内容
    # ** -----------------------------------------------------------------------------------------------
    # **
    # **  China Mobile(Chengdu) Information Technology Co., Ltd.
    # **  All Rights Reserved.
    #***************************************************************************************************
    export LANG=en_US.UTF-8
    export HIVE_HOME=`echo $HIVE_HOME`
    export PATH=$PATH:$HIVE_HOME/bin
    
    #参数配置
    
    #初始化表名
    source_table_tmp=%tmp_table_name%
    
    #结果表 
    target_table=%ods_table_name%
    
    #echo "drop table if exists ;
    # i-client-base_content_20180503_ssms_000000.txt
    echo "
    use mgwh_mgplusmigrate_ods;
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    
    insert into ${target_table} partition(dayid)
    select filename,
      from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),
    %column_list%
      substr(filename,%startIndex%,8) as dayid
      from ${source_table_tmp};" | hive &&
    
    exit 0
    

    相关文章

      网友评论

          本文标题:一段读取 excel 自动生成 hive load shell

          本文链接:https://www.haomeiwen.com/subject/sodkfqtx.html