hbase_大数据系统

hbase

创建 hive

建命名空间

create_namespace '名空间'

创建表

create '名空间:表名','列族'

hive指向hdfs数据

create TABLE IF NOT EXISTS (

)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY ',' --以什么切割

// STORED AS TEXTFILE

LOCATION '/app/data'

//TBLPROPERTIES ('skip.header.line.count'='1') --去一行“表头”

COMMENT 'This is an external table'

ROW FORMAT DELIMITED

FIELDS TERMINATED BY ','

STORED AS TEXTFILE

LOCATION '/app/data/exam';

hive 数据映射到hbase

CREATE EXTERNAL TABLE IF NOT EXISTS ex_exam_covid19_record(

key string,

列名int,

列名int)

stored by 'org.apache.hadoop.hive.hbase.HbaseStorageHandler'

with serdeproperties

("hbase.columns.mapping"=":key,record:列名,record:列名")

tblproperties("hbase.table.name" = "exam:hbase_表名");

并将 xxx列和 yyy列合并生成 rowkey，并保存到 hive表中

insert into table hbase_表名

select concat(xxx,yyy),/confirm/iedCount,/confirm/iedCount-if(pre_confirmedCount is

null,0,pre_/confirm/iedCount) from(

select

id,

/confirm/iedCount,

recordDate,

province,

lag(/confirm/iedCount) over (partition by province order by id) as pre_/confirm/iedCount

from ex_exam_record) tb1;

with

temp as (select concat(列名,列名) as key

from hbase_表)

insert into table 新表 select key,count(key)

from temp group by key;

遍历前10条数据

scan '库名:表名',{LIMIT=>10}

创建时间分区表

drop table if exists userbehavior_partitioned;

create table userbehavior_partitioned(

user_id int,

item_id int,

category_id int,

behavior_type string,

time string

)

partitioned by (dt string)

stored as orc

动态分区

set hive.exec.dynamic.partition=true;

set hive.exec.dynamic.partition.mode=nostrict;

插入数据到分区表中

insert overwrite table userbehavior_partitioned partition(dt)

select

user_id ,

item_id ,

category_id ,

behavior_type ,

from_unixtime(time,'YYYY-MM-dd HH:mm:ss') time,

substring(from_unixtime(time,'YYYY-MM-dd HH:mm:ss'),1,10) dt

from userbehavior

窗口函数

//      val win = Window.partitionBy("continent").orderBy($"/confirm/iedIncr".cast("int").desc)
//       df.where("recordDate=20200408")
//         .select($"recordDate",$"coent",$"cName",$"conIncr",row_number().over(win).as("rank"))
//           .where("rank=1").drop("rank")
//         .show()

maven


  org.apache.spark
  spark-core_2.11
  2.3.4




  org.apache.spark
  spark-sql_2.11
  2.3.4

hbase

大数据系统相关栏目本月热门文章