Hive中Parquet格式的使用

#Hive建外部External表(外部表external table):

CREATE EXTERNAL TABLE `table_name`(
  `column1` string,
  `column2` string,
  `column3` string)
PARTITIONED BY (
  `proc_date` string)
ROW FORMAT SERDE
  ‘org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe’
STORED AS INPUTFORMAT
  ‘org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat’
OUTPUTFORMAT
  ‘org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat’
LOCATION
  ‘hdfs://hdfscluster/…’
TBLPROPERTIES ( ‘orc.compress’=’snappy’);

#

#

#添加分区并加载分区数据:

alter table table_name add partition (proc_date=’${hivevar:pdate}’) location ‘…’(不改变源数据存储位置)

alter table table_name add if not exsit partition (proc_date=’${hivevar:pdate}’) location ‘hdfs://hdfscluster/’

load data inpath ‘…’ into table table_name partition(proc_date=’${hivevar:pdate}’);(会将源数据切到hive表指定的路径下)

#删除分区:alter table table_name drop if exists partition(proc_date=’${hivevar:pdate}’);

#

    原文作者:liuzx32
    原文地址: https://www.jianshu.com/p/e469e445b81c
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞