hive基本操作與應用

1.啟動hadoop

start-all.sh

2.hdfs上建立資料夾

hdfs dfs -mkdir wcinput

hdfs dfs -ls /user/hadoop

3.上傳檔案至hdfs

hdfs dfs -put ./509.txt wcinput

hdfs dfs -ls /user/hadoop/wcinput

4.啟動hive

hive

5.建立原始文件表

create table docs(line string)

6.匯入檔案內容到表docs並檢視

load data inpath '/user/hadoop/wcinput/509.txt' overwrite into table docs;

select *from docs;//檢視表資訊

7.用hql進行詞頻統計，結果放在表word_count裡

用一張表，記錄檔案資料，檔案的一行就是表裡乙個欄位的資料，所以使用換行符作為分隔符，並以檔名為分割槽

drop table file_data;

create table file_data(context string) partitioned by (file_name string)row format delimited fields terminated by '\n'stored as textfile;

從hdfs中把檔案資料匯入file_data

cat /home/hadoop/demo.txt

load data local inpath '/home/hadoop/demo.txt' overwrite into table file_data partition(file_name='/home/hadoop/demo.txt');

查詢file_data

select * from file_data;

將切分出來的每個單詞作為一行記錄到結果表裡面

select explode(split(context,' ')) from file_data where file_name='/home/hadoop/demo.txt';

drop table wordcount;

create table wordcount(context string) partitioned by (file_name string)row format delimited fields terminated by ' 'stored as textfile;

insert overwrite table wordcount partition(file_name='/home/hadoop/demo.txt') select explode(split(context,' ')) from file_data where file_name='/home/hadoop/demo.txt';

使用hql查詢

select context, count(context) from wordcount where file_name='/home/hadoop/demo.txt' group by context;

hive基本操作與應用

hive基本操作與應用

hive基本操作與應用

hive基本操作與應用

相關推薦