hive API2
hive udf 读取HDFS的orc文件
package hive_udf_province;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by Administrator on 2017/11/7.s
* API2
* 参数为需要兑换表的字段,去跟码表的第一个字段去匹配,兑换出码表的第二个字段
*/
public class UDF_province_name_orc_2 extends UDF {
public static String evaluate(String pro_id) throws IOException {
String INPUT = "/user/hive/warehouse/yl.db/dim_province_orc/000000_0";
Configuration conf = new Configuration();
Path file_in = new Path(INPUT);
Reader reader = OrcFile.createReader(FileSystem.get(URI.create(INPUT), conf), file_in);
StructObjectInspector inspector = (StructObjectInspector) reader.getObjectInspector();
RecordReader records = reader.rows();
Object row = null;
Map<String,String> datamap = new HashMap<String, String>();
while (records.hasNext()) {
row = records.next(row);
List value_lst = inspector.getStructFieldsDataAsList(row);
datamap.put(value_lst.get(0).toString(),value_lst.get(1).toString());
}
return datamap.get(pro_id);
}
}
hive
add jar /root/yl/orc11.jar;
create temporary function split_province_orc as 'hive_udf_province.UDF_province_name_orc2';
select split_province_orc(province_id) from yl.province_test;