hive的檔案的記錄格式serde

1、csv serde

create table if not exists csv(
id int,
name string
)row format serde 'org.apache.hadoop.hive.serde2.opencsvserde'
;

load data local inpath "localpath/mycsv.csv" into table csv;

create table if not exists csv(
id int,
name string
)row format serde 'org.apache.hadoop.hive.serde2.opencsvserde'
with serdeproperties(
'separatorchar'=',',//指定分割符
'qutochar'='"',//指定欄位的引號
'escapechar'="\"//指定轉移符
)store as textfile;

2、json serde：解析複雜的json

hive>add jar /localpath/json-serde-1.3-jar-with-dependencies.jar;

}}

格式是：

string，array，map>

create table if not exists json(
provice string,
city array,
person map>
)row format serde 'org.openx.data.jsonserde.jsonserde'
;

load data local inpath "localpath/file" into table json;

##查詢城市多與三個的並且青年婦女人數大於80萬的資料
select * from json3 where size(city) >=3 and person["woman"][0] > 80;

regex serde:正則匹配

220.196.111.12 [10/jan/2018:00:02:00 + 0800]
220.197.111.12 [10/jan/2018:00:02:00 + 0800]
220.19c.111.12 [10/jan/2018:00:02:00 + 0800]
220.ax1.111.12 [10/jan/2018:00:02:00 + 0800]
220.198.111.12 [10/jan/2018:00:02:00 + 0800]

create table if not exists regex(
host string,
data string
)row format serde'org.apache.hadoop.hive.serde2.regexserde'
with serderproperties(
"input.regex" = "^([0-9].[0-9].[0-9].[0-9](.*)$)"
)stroed as textfile
;

load data local inpath"localpath/file"into table regex;

select * from regex;

hive的幾種檔案格式

hive檔案儲存格式 1.textfile textfile為預設格式儲存方式行儲存磁碟開銷大資料解析開銷大壓縮的text檔案 hive無法進行合併和拆分 2.sequencefile 二進位制檔案,以的形式序列化到檔案中儲存方式行儲存可分割壓縮一般選擇block壓縮優勢是檔案...

hive的檔案的記錄格式serde

hive的幾種檔案格式

hive的幾種檔案格式

hive的幾種檔案格式

相關推薦