hive匯入資料到hbase

2021-08-26 09:21:16 字數 4492 閱讀 4807

hive有一張表user_tag_detail,表中資料約1.3億,需要將改表資料匯入到hbase

嘗試了兩種方式

建立關聯表

create table hbase_user_tag_detail(id string,name string ....)
插入資料

insert overwrite table hbase_user_tag_info select * from user_tag_info;
指令碼如下

#!/bin/bash

#需要導資料的表

hive_table=user_info

#主鍵rowkey=id

#hfile的存放路徑

hfile_path=/user/test/

echo "##################################[step 1 generate splites]#####################################"

/etc/alternatives/beeline -u 'jdbc:hive2://***' -n** -p*** -e "

use test;

create external table if not exists hbase_splits(partition string, count int)

partitioned by (table string);

create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.udfrowsequence';

insert overwrite table hbase_splits

partition (table='$')

select $,row_sequence() from (

select

$,row_sequence() as row

from (

select

$from $ tablesample(bucket 1 out of 1 on $) s order by $

) t order by $

) x where (row % 1)=0 order by $ ;

create external table if not exists hbase_splits_file(partition string)

partitioned by (table string)

row format

serde 'org.apache.hadoop.hive.serde2.binarysortable.binarysortableserde'

stored as

inputformat 'org.apache.hadoop.mapred.textinputformat'

outputformat 'org.apache.hadoop.hive.ql.io.hivenullvaluesequencefileoutputformat'

location '/user/test/hbase_splits_file';

insert overwrite table hbase_splits_file

partition (table='$')

select partition from hbase_splits where table='$';"

echo "##################################[step 2 create hfile table ]#####################################"

echo "debug: table name is: "$

sql_select_col="create external table if not exists hbase_$("

desc_table_cols=$(/etc/alternatives/beeline -u 'jdbc:hive2://***' -n*** -p*** -e "

use test;

desc $;

")temp_file=`mktemp -u temp.user.******.$$`

echo "$desc_table_cols" > $

while read line

do if [[ $ =~ "string" ]] || [[ $ =~ "int" ]]

then

col_name=$(echo "$"|awk -f ' ' '')

col_type=$(echo "$"|awk -f ' ' '')

echo "debug:col_name:"$

echo "debug:col_type:"$

sql_select_col="$$ $,";

fi done < $

rm -rf $

len=$(expr length "$")

let "len = len - 1"

sql_select_col=$(echo $|cut -c1-$len)

sql_select_col=$") stored as inputformat 'org.apache.hadoop.mapred.textinputformat' outputformat 'org.apache.hadoop.hive.hbase.hivehfileoutputformat' tblproperties('hfile.family.path' = '/user/test/hbsort/cf');"

echo "debug: cols:"$

/etc/alternatives/beeline -u 'jdbc:hive2://****' -n** -p** -e "use test;

$;"

echo "##################################[step 3 create hfile ]#####################################"

task_num=$(

/etc/alternatives/beeline -u 'jdbc:hive2://*****' -n*** -p*** -e "

use test;

select max(count) + 1 from hbase_splits where table='$';

") task_num_str=$(echo $)

num=$(echo "$" | awk '')

echo $

/etc/alternatives/beeline -u 'jdbc:hive2://****' -n*** -p**** -e "

use test;

set mapred.reduce.tasks=$;

set total.order.partitioner.path=/user/test/hbase_splits_file;

set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.totalorderpartitioner;

insert overwrite table hbase_$

select * from $ cluster by $;

"status=$?

echo status=$

if [ $ -eq 0 ];

then

echo "##################################[step 4 create hbase table ]#####################################"

#create 'testtable',

echo "create '$', " | hbase shell

echo "##################################[step 5 move hfile to hbase ]#####################################"

hadoop jar /opt/cloudera/parcels/cdh-5.15.0-1.cdh5.15.0.p0.21/jars/hbase-server-1.2.0-cdh5.15.0.jar completebulkload -dhbase.zookeeper.quorum=10.1.39.99 -dhbase.zookeeper.property.clientport=2181 /user/test/hbsort $

echo "##################################[step 6 test ]#####################################"

echo "scan '$', " | hbase shell

else

echo "error:@@@@@@ generate hfile error @@@@@@";

exit -1;

fi

將資料從hbase匯入到hive

選定test0721表 hbase main 001 0 list table system.catalog system.function system.log system.mutex system.sequence system.stats test0721 7row s in 0.1500 ...

乾貨總結 Hive 資料匯入 HBase

思路是用 hbase 自帶的 importtsv 工具。建表語句,要用文字格式,即 tsv 格式,分割符可以自己定義 create table if not exists tablea rowkey string,partitioned by dt string format 2017 06 01 ...

將HBase中的表資料匯入到hive中

user friend 建立hb user friend外部表 create external table events.hb user friend row key string user id string friend id string stored by org.apache.hadoop...