配置hive时出现不能加载自己修改的hive-site.xml等配置文件的问题。发现它总是加载默认的配置文件。
解决:
hadoop的配置文件hadoop-env.sh中加上export HADOOP_CLASSPATH=$HIVE_HOVE/conf:$HADOOP_CLASSPATH
还有一个问题:运行其他hadoop子项目时总会出现找不到类文件,必须把相关jar包拷贝到hadoop的lib目录下,导致其lib目录会越来越大。
至今不知道怎样将其他jar包加入到classpath中,网上说用export HADOOP_CLASSPATH=“”,但是好像不行
hive –config /root/etc/hive(注:/root/etc/hive是指hive-site.xml的存放目录)
HiveQL以分号结束。可以跨行。
在hive的shell上可以使用dfs命令执行HDFS文件操作。
dfs -ls /user/hive/warehouse;
hive语句必须以分号“;”结束。
不支持更新,索引和事务。
表名,列名不区分大小写。
在hive的shell上可以使用dfs命令执行HDFS文件的操作。>>dfs -ls /user/hive/warehouse/;
查看和设置临时变量:>>set fs.default.name[=hdfs://zhaoxiang:9000];
导入jar包: >>add jar hivejar.jar;
创建函数: >>create temporary function udfTest as ‘com.cstore.udfExample’;
【在pig中使用UDF,先用register语句注册jar文件,之后可以通过完全的java类名调用,或者用define语句为UDFding 指定一个名称:
register pigjar.jar;
define UPPER org.pigjar.string.UPPER();
B = foreach a generate UPPER($0); 】
可以在本地命令行运行hive的shell:
$ hive -e ‘select * from userinfo’ (执行hiveQL语句)
$ hive –config /hive-0.9.0/conf (重新载入新的配置文件)
$ hive –service hiveserver 50000(启动服务)
create table cite(citing int, cited int) row format delimited fields terminated by ‘,’ stored as textfile; //sequencefle
load data (local) inpath ‘cite75_99.txt’ overwrite into table cite;//若不加local则默认为HDFS路径
select * from cite limit 10;
show tables;
describe cite;
select count(1)/count() from cite; //count(1)相当于SQL中的count()
create table cite_count (cited int, count int);
insert overwrite table cite_count select cited , count(citing) from cite group by cited;
select * from cite_count where count > 10 limit 10;
drop table cite_count;
create table page_view(viewTime int, userid bigint,
page_url string, referrer_url string,
ip string comment ‘ip address of user’)
comment ‘this id the page view table’
partitioned by (dt string, country string)//注意table中的列不能和partition中的列重合
clustered by (userid) into 32 buckets //桶
row format delimited
fields terminated by ‘,’
collection items terminated by ‘\002’
map keys terminated by ‘\003’
lines terminated by ‘\n’
stored as textfile;
//取样
select avg(viewTime) from page_view tablesample(bucket 1 out of 3 [on id]);
//创建外部表,指定目录位置,删除外部表时会删除元数据,表中的数据需手动删除
create external table page_view(viewTime int, userid bigint,
page_url string, referrer_url string,
ip string comment ‘ip address of user’)
location ‘path/to/existing/table/in/HDFS’;
//修改表
alter table page_view rename to pv;
alter table pv add columns (newcol string);
alter table pv drop partition (dt=’2009-09-01′);
show tables ‘page_.*’;
load data local inpath ‘page_view.txt’
overwrite into table page_view
partition (dt=’2009-09-01′,country=’US’);
在hive的shell上执行unix命令:命令前加感叹号(!),命令尾加分号(;).
hive> ! ls ;
hive> ! head hive_result;
//执行查询:
insert overwrite table query-result; //eg: insert overwrite table query_result select * from page_view where country=’US’;
insert overwrite (local) directory ‘/hdfs-dir(local-dir)/query_result’ query;
select country , count(distinct userid) from page_view group by countey;
//子查询,只能在from子句中出现子查询
select teacher, max(class-num) from
(select teacher, count(classname) as class-num from classinfo group by teacher)subquery
group by teacher;
//连接
select pv., choice., f.friends from page_view pv
join user u on (pv.userid=u.id)
join friends-list f on (u.id=f.uid);
//多表插入
create table mutil1 as select id, name from userinfo;
create table mutil2 like mutil1;
from userinfo insert overwrite table mutil1 select id, name
insert overwrite table mutil2 select count(distint id),name group by name;
//创建视图
create view teacher_classnum as select teacher, count(classname) from classinfo group by teacher;