大資料專案流程

1.資料的預處理階段

2.資料的入庫操作階段

3.資料的分析階段

4.資料儲存到資料庫階段

5.資料的查詢顯示階段

reduce:

driver:

create table 表名( videoid string, uploader string, age int row format delimited fields terminated by ":" collection items terminated by ","

stored as textfile;

（orc）格式

create table video_user_orc ( uploader string, videos int, friends int) row format delimited fields terminated by ","

stored as orc;

load data local inpath '資料路徑 ' into table 表名;

insert into table orc表 select *from 元資料表;

hive -e 'select * from 庫名.表名 where 條件'

> 要儲存的路徑

替換引號為空 :%s/"//g

替換[ 為空 :%s/[//g

替換] 為空 :%s/]//g

create external table rate( videoid string, uploader string, age int, category array, length int, views int, rate float, ratings int, comments int, relatedid array )row format delimited fields terminated by "\t" collection items terminated by ","

stored as textfile;

load data local inpath '資料路徑' overwrite into table 外部表名;

create table hbase_rate(
videoid string,
uploader string,
age int 
stored by 'org.apache.hadoop.hive.hbase.hbasestoragehandler'
with serdeproperties(
=":key,data:uploader,data:age,data:category,data:length,data:views,data:rate,data:ratings,data:comments,data:relatedid"
)tblproperties(
"hbase.table.name"
="表名"
);

insert into table 表名 select * from 外部表名;

public static void main(string[
] args) throws exception 
}}

public static void main(string[
] args) throws exception 
}}

大資料專案流程

離線大資料專案流程

大資料介紹及大資料專案流程

大資料專案3

大資料 專案流程

離線大資料專案流程

大資料介紹及大資料專案流程

大資料專案3

相關推薦

大資料專案流程