测试数据
[root@master hive]# cat ./tmp_data/weblog.data
1c13c719fbfd87f49522f189413c6ba /gybfokxyojgtwrq.html 2012-05-10 21:17:32 169.65.197.63
e42fe5714cb4402f81e5dce8da1755 /hpipbodlxgt.html 2012-05-10 21:12:04 166.22.84.135
[root@master hive]# cat ./tmp_data/ip_to_country.txt
169.65.197.63 United States
166.22.84.135 United Kingdom
建表语句
[root@master hive]# cat /opt/weblog.hive
create external table weblog_entries
( md5 string,
url string,
request_date string,
request_time string,
ip string
)
row format delimited fields terminated by '\t'
lines terminated by '\n'
location '/data/hive/weblog';
[root@master hive]# cat /opt/ip_to_country.hive
create table ip_to_country(
ip string,
country string
)
row format delimited fields terminated by '\t'
lines terminated by '\n'
location '/data/hive/ip_to_country';
加载数据
hive> load data local inpath './tmp_data/weblog.data' into table weblog_entries;
hive> load data local inpath './tmp_data/ip_to_country.txt' into table ip_to_country;
连接查询
hive> select wle.*, itc.country FROM weblog_entries wle join ip_to_country itc on wle.ip = itc.ip;
使用查询结果创建新表
hive> create table weblog_entries_with_url_length as select url, request_date, request_time, length(url) as url_length from weblog_entries;