简介
实现自定义的UDF需要编写Java程序,然后在Hive客户端中加载相关Jar并注册函数后就可以使用了。
示例:实现转化IP地址为二进制格式
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public final class IPUnMasker extends UDF {
public Text evaluate(final Text s) {
if (s == null) { return null; }
StringBuilder ret = new StringBuilder();
String[] items = s.toString().split("\\.");
if (items.length != 4)
return null;
for (String item : items) {
StringBuilder sb = new StringBuilder();
int a = Integer.parseInt(item);
for (int i=0; i<8; i++) {
sb.insert(0, a%2);
a = a/2;
}
ret.append(sb);
}
return new Text(ret.toString());
}
public static void main(String[] args) {
String ip = "112.117.138.216";
IPUnMasker unmasker = new IPUnMasker();
System.out.println(unmasker.evaluate(new Text(ip)));
}
}
打包
mvn package
加载
从本地加载
hive> add jar xxx.jar;
从hdfs加载并注册
create [temporary] function unmask_ip as ‘example.IPUnMasker’ using jar ‘hdfs:///user/zzz/jars/hive_udf-1.0.0.jar’;
** 删除函数 **
drop funciton unmask_ip;
使用udf
select unmask_ip(‘112.117.138.216’);
附录
** pom依赖 **
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
** 相关链接 **
https://cwiki.apache.org/confluence/display/Hive/HivePlugins