一段读取 excel 自动生成 hive load shell 脚本的代码

自动化是程序员的天性,这里是一段小代码自动化手工重复的工作。

package collect;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * All rights Reserved, Designed By Migu.cn
 *
 * @Description: 读取表 Excel 信息,自动生成动态分区脚本
 * @Author: Yao
 * @Date: 2018/5/8 9:27
 * @Version: v0.1
 */
public class AutoGenSh {

    public static final String template_file = "dynamicLoadTemplate.txt";
    public static final String interface_name = "%interface_name%";
    public static final String date = "%date%";
    public static final String table_name = "%ods_table_name%";
    public static final String file_name = "%file_name%";
    public static final String tmpTableName = "%tmp_table_name%";
    public static final String start_index = "%startIndex%";
    public static final String column_list = "%column_list%";


    static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式


    public static void main(String[] args) {
        // read config from configfile
        String currentDir = System.getProperty("user.dir");
        File[] confFiles = new File(currentDir).listFiles(new FilenameFilter() {
            public boolean accept(File dir, String name) {
                return name.endsWith(".conf");
            }
        });

        for (File conf : confFiles) {
            String confName = conf.getName();

            // read config find interfaceName and fileName;
            try {
                List<String> lines = Files.readLines(conf, Charsets.UTF_8);
                final String interfaceName = lines.get(0).substring(lines.get(0).indexOf("=") + 1);
                final String fileName = lines.get(1).substring(lines.get(1).indexOf("=") + 1);
                final String tableName = confName.substring(0, confName.indexOf("."));

                // find start index from filename
                final int startDateIndex = findDateIndex(fileName);

                // extract columns from excel
                String excelPath = currentDir + File.separator + tableName + ".xlsx";
                // don't miss last ,
                File excelFile = new File(excelPath);
                final String columns = extractColumns(excelFile) + ",";


                // final text after replace
                final List<String> replaceResult = new ArrayList<>();
                final String currentDate = dateFormat.format(new Date());

                File templateFile = new File(currentDir + File.separator + template_file);
                Files.readLines(templateFile, Charsets.UTF_8, new LineProcessor<List<String>>() {
                    public boolean processLine(String s) throws IOException {
                        s = s.replace(interface_name, interfaceName)
                                .replace(date, currentDate)
                                .replace(table_name, tableName)
                                .replace(tmpTableName, tableName + "tmp")
                                .replace(column_list, columns)
                                .replace(file_name, fileName)
                                .replace(start_index, startDateIndex + "");
                        return replaceResult.add(s);
                    }

                    public List<String> getResult() {
                        return replaceResult;
                    }
                });

                String shText = Joiner.on("\n").join(replaceResult);

                // export sh file
                // mkdir and move file
                String newDir = currentDir + File.separator + tableName;
                new File(newDir).mkdirs();

                // move xlsx and conf
                conf.renameTo(new File(newDir + "/" + conf.getName()));

                File eFile = new File(excelPath);
                eFile.renameTo(new File(newDir + "/" + eFile.getName()));

                String destShFile = newDir + "/" + tableName + "_2ods.sh";
                Files.write(shText.getBytes(), new File(destShFile));
                System.out.println("success export sh file: " + destShFile);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


    }

    /**
     * extract columns from excel
     *
     * @param templateFile
     * @return
     */
    private static String extractColumns(File templateFile) {
        Workbook excel = null;
        try {
            List<String> list = new ArrayList<String>();
            excel = WorkbookFactory.create(templateFile);
            Sheet sheet = excel.getSheetAt(0);
            for (int i = 0; i <= sheet.getLastRowNum(); i++) {
                Cell cell = sheet.getRow(i).getCell(0);
                String value = cell.getStringCellValue();

                // skip filename filedname in_date
                if (!value.toLowerCase().contains("filename")
                        && !value.toLowerCase().contains("in_date")
                        && !value.toLowerCase().contains("fieldname")) {
                    list.add("  " + value);
                }
            }

            return Joiner.on(",\n").join(list);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidFormatException e) {
            e.printStackTrace();
        } finally {
            if (excel != null) {
                try {
                    excel.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return null;
    }

    /**
     * 从文件名中获取 substring start index
     *
     * @param fileName i-client-base_content_20180503_ssms_000000.txt
     * @return
     */
    private static int findDateIndex(String fileName) {
        if (fileName.contains("YYYYMMDD")) {
            return fileName.indexOf("YYYYMMDD") + 1;
        }

        String extractDateRegex = "(\\d{8})";
        Matcher matcher = Pattern.compile(extractDateRegex).matcher(fileName);
        if (matcher.find()) {
            String date = matcher.group(1);
            return fileName.indexOf(date) + 1;
        }

        return -1;
    }
}

Shell 脚本模板定义如下

#!/bin/sh
#
#***************************************************************************************************
# **  文件名称:   %interface_name% 入 hive ods
# **  功能描述:   从临时表加载 动态分区
# **              
# **  创建者:     Chen Yao
# **  创建日期:   %date%
# **  修改日志:   
# **  修改日期          修改人          修改内容
# ** -----------------------------------------------------------------------------------------------
# **
# **  China Mobile(Chengdu) Information Technology Co., Ltd.
# **  All Rights Reserved.
#***************************************************************************************************
export LANG=en_US.UTF-8
export HIVE_HOME=`echo $HIVE_HOME`
export PATH=$PATH:$HIVE_HOME/bin

#参数配置

#初始化表名
source_table_tmp=%tmp_table_name%

#结果表 
target_table=%ods_table_name%

#echo "drop table if exists ;
# i-client-base_content_20180503_ssms_000000.txt
echo "
use mgwh_mgplusmigrate_ods;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;

insert into ${target_table} partition(dayid)
select filename,
  from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),
%column_list%
  substr(filename,%startIndex%,8) as dayid
  from ${source_table_tmp};" | hive &&

exit 0
    原文作者:tracyao
    原文地址: https://www.jianshu.com/p/4c967d083fff
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞