- 使用beeline连接到hive
beeline -u "jdbc:hive2://localhost:10000" ' ' ' '
- 创建users表和train表,为了方便, 创建表的hql语句单独写在一个脚本文件中,我的是
ctable.hql
create external table if not exists users(
user_id bigint,
locale string,
birthdyear int,
gender string,
joinedAt string,
location string,
timezone string
)
row format delimited
fields terminated by ','
stored as textfile
location '/temp/data/usersdata'
tblproperties("skip.header.line.count"="1");
create external table if not exists train(
user_id bigint,
event_id bigint,
invited int,
`timestamp` string,
interested int,
not_interested int
)
row format delimited
fields terminated by ','
stored as textfile
location '/temp/data/traindata'
tblproperties("skip.header.line.count"="1");
说明:
一张hive表在物理上对应一个文件夹。拿第一张表users来说,它指向HDFS上的
/temp/data/usersdata
文件夹,usersdata
文件夹中存放数据,这些数据是以文件形式存放的,hive表的定义就是对这些文件格式的一种描述。下面对users表的后面几行做一些说明。
语句 | 意思 |
---|---|
row format delimited | 文件以行的格式分隔 |
fields terminated by ',' | 每个字段之间以逗号分隔 |
stored as filetext | 文件的储存格式为textfile |
location '/temp/data/usersdata' | 这张表在HDFS中所指向的文件夹(usersdata) |
tblproperties("skip.header.line.count"="1") | 跳过文件中的第一行数据 |
创建比较复杂的表:
create external table if not exists emp(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile
location '/temp/data/emp';
注:上面的ctable.hql文件中在执行时如果出现FAILED: ParseException line 7:1 Failed to recognize predicate 'timestamp'. Failed rule: 'identifier' in column specification类似的错误,说明在表字段名与关键字产生了冲突,解决方法是把字段名改一下或者给字段加上``符号。
- 在hive shell 环境中执行刚才的脚本
ctable.hql
,也可以在本地执行
// hive shell
source /root/ctable.hql;
// 本地
hive -f "ctable.hql"
注: 在beeline中执行脚本会报错
- 创建完成后向里面导入准备好的数据
load data local inpath '/root/data/users.csv' into table users;
load data local inpath '/root/data/train.csv' into table train;
注:我是在hive命令行中导入的, 在beeline中执行上面的语句会报错
- 看一下结果
select * from users limit 5;
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
| users.user_id | users.locale | users.birthdyear | users.gender | users.joinedat | users.location | users.timezone |
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
| 3197468391 | id_ID | 1993 | male | 2012-10-02T06:40:55.524Z | Medan Indonesia | 480 |
| 3537982273 | id_ID | 1992 | male | 2012-09-29T18:03:12.111Z | Medan Indonesia | 420 |
| 823183725 | en_US | 1975 | male | 2012-10-06T03:14:07.149Z | Stratford Ontario | -240 |
| 1872223848 | en_US | 1991 | female | 2012-11-04T08:59:43.783Z | Tehran Iran | 210 |
| 3429017717 | id_ID | 1995 | female | 2012-09-10T16:06:53.132Z | | 420 |
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
select * from train limit 5;
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
| train.user_id | train.event_id | train.invited | train.timestamp | train.interested | train.not_interested |
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
| 3044012 | 1918771225 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 1502284248 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 2529072432 | 0 | 2012-10-02 15:53:05.754000+00:00 | 1 | 0 |
| 3044012 | 3072478280 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 1390707377 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
.