java读取hive的orc文件

hive API2
hive udf 读取HDFS的orc文件

package hive_udf_province;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * Created by Administrator on 2017/11/7.s
 * API2
 * 参数为需要兑换表的字段,去跟码表的第一个字段去匹配,兑换出码表的第二个字段
 */
public class UDF_province_name_orc_2 extends UDF {
    public static String evaluate(String pro_id) throws IOException {
        String INPUT = "/user/hive/warehouse/yl.db/dim_province_orc/000000_0";
        Configuration conf = new Configuration();
        Path file_in = new Path(INPUT);
        Reader reader = OrcFile.createReader(FileSystem.get(URI.create(INPUT), conf), file_in);
        StructObjectInspector inspector = (StructObjectInspector) reader.getObjectInspector();
        RecordReader records = reader.rows();
        Object row = null;
        Map<String,String> datamap = new HashMap<String, String>();
        while (records.hasNext()) {
            row = records.next(row);
            List value_lst = inspector.getStructFieldsDataAsList(row);
            datamap.put(value_lst.get(0).toString(),value_lst.get(1).toString());
        }
        return  datamap.get(pro_id);
    }
}

hive
add jar /root/yl/orc11.jar;
create temporary function split_province_orc as 'hive_udf_province.UDF_province_name_orc2';
select split_province_orc(province_id) from yl.province_test;
    原文作者:AI_leef
    原文地址: https://www.jianshu.com/p/0c601cea335a
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞