自动化是程序员的天性,这里是一段小代码自动化手工重复的工作。
package collect;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* All rights Reserved, Designed By Migu.cn
*
* @Description: 读取表 Excel 信息,自动生成动态分区脚本
* @Author: Yao
* @Date: 2018/5/8 9:27
* @Version: v0.1
*/
public class AutoGenSh {
public static final String template_file = "dynamicLoadTemplate.txt";
public static final String interface_name = "%interface_name%";
public static final String date = "%date%";
public static final String table_name = "%ods_table_name%";
public static final String file_name = "%file_name%";
public static final String tmpTableName = "%tmp_table_name%";
public static final String start_index = "%startIndex%";
public static final String column_list = "%column_list%";
static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式
public static void main(String[] args) {
// read config from configfile
String currentDir = System.getProperty("user.dir");
File[] confFiles = new File(currentDir).listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".conf");
}
});
for (File conf : confFiles) {
String confName = conf.getName();
// read config find interfaceName and fileName;
try {
List<String> lines = Files.readLines(conf, Charsets.UTF_8);
final String interfaceName = lines.get(0).substring(lines.get(0).indexOf("=") + 1);
final String fileName = lines.get(1).substring(lines.get(1).indexOf("=") + 1);
final String tableName = confName.substring(0, confName.indexOf("."));
// find start index from filename
final int startDateIndex = findDateIndex(fileName);
// extract columns from excel
String excelPath = currentDir + File.separator + tableName + ".xlsx";
// don't miss last ,
File excelFile = new File(excelPath);
final String columns = extractColumns(excelFile) + ",";
// final text after replace
final List<String> replaceResult = new ArrayList<>();
final String currentDate = dateFormat.format(new Date());
File templateFile = new File(currentDir + File.separator + template_file);
Files.readLines(templateFile, Charsets.UTF_8, new LineProcessor<List<String>>() {
public boolean processLine(String s) throws IOException {
s = s.replace(interface_name, interfaceName)
.replace(date, currentDate)
.replace(table_name, tableName)
.replace(tmpTableName, tableName + "tmp")
.replace(column_list, columns)
.replace(file_name, fileName)
.replace(start_index, startDateIndex + "");
return replaceResult.add(s);
}
public List<String> getResult() {
return replaceResult;
}
});
String shText = Joiner.on("\n").join(replaceResult);
// export sh file
// mkdir and move file
String newDir = currentDir + File.separator + tableName;
new File(newDir).mkdirs();
// move xlsx and conf
conf.renameTo(new File(newDir + "/" + conf.getName()));
File eFile = new File(excelPath);
eFile.renameTo(new File(newDir + "/" + eFile.getName()));
String destShFile = newDir + "/" + tableName + "_2ods.sh";
Files.write(shText.getBytes(), new File(destShFile));
System.out.println("success export sh file: " + destShFile);
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* extract columns from excel
*
* @param templateFile
* @return
*/
private static String extractColumns(File templateFile) {
Workbook excel = null;
try {
List<String> list = new ArrayList<String>();
excel = WorkbookFactory.create(templateFile);
Sheet sheet = excel.getSheetAt(0);
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
Cell cell = sheet.getRow(i).getCell(0);
String value = cell.getStringCellValue();
// skip filename filedname in_date
if (!value.toLowerCase().contains("filename")
&& !value.toLowerCase().contains("in_date")
&& !value.toLowerCase().contains("fieldname")) {
list.add(" " + value);
}
}
return Joiner.on(",\n").join(list);
} catch (IOException e) {
e.printStackTrace();
} catch (InvalidFormatException e) {
e.printStackTrace();
} finally {
if (excel != null) {
try {
excel.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
/**
* 从文件名中获取 substring start index
*
* @param fileName i-client-base_content_20180503_ssms_000000.txt
* @return
*/
private static int findDateIndex(String fileName) {
if (fileName.contains("YYYYMMDD")) {
return fileName.indexOf("YYYYMMDD") + 1;
}
String extractDateRegex = "(\\d{8})";
Matcher matcher = Pattern.compile(extractDateRegex).matcher(fileName);
if (matcher.find()) {
String date = matcher.group(1);
return fileName.indexOf(date) + 1;
}
return -1;
}
}
Shell 脚本模板定义如下
#!/bin/sh
#
#***************************************************************************************************
# ** 文件名称: %interface_name% 入 hive ods
# ** 功能描述: 从临时表加载 动态分区
# **
# ** 创建者: Chen Yao
# ** 创建日期: %date%
# ** 修改日志:
# ** 修改日期 修改人 修改内容
# ** -----------------------------------------------------------------------------------------------
# **
# ** China Mobile(Chengdu) Information Technology Co., Ltd.
# ** All Rights Reserved.
#***************************************************************************************************
export LANG=en_US.UTF-8
export HIVE_HOME=`echo $HIVE_HOME`
export PATH=$PATH:$HIVE_HOME/bin
#参数配置
#初始化表名
source_table_tmp=%tmp_table_name%
#结果表
target_table=%ods_table_name%
#echo "drop table if exists ;
# i-client-base_content_20180503_ssms_000000.txt
echo "
use mgwh_mgplusmigrate_ods;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into ${target_table} partition(dayid)
select filename,
from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),
%column_list%
substr(filename,%startIndex%,8) as dayid
from ${source_table_tmp};" | hive &&
exit 0