建命名空间
create_namespace '名空间'
创建表
create '名空间:表名','列族'
hive指向hdfs数据
create TABLE IF NOT EXISTS (
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' --以什么切割
// STORED AS TEXTFILE
LOCATION '/app/data'
//TBLPROPERTIES ('skip.header.line.count'='1') --去一行“表头”
COMMENT 'This is an external table'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/app/data/exam';
hive 数据映射到hbase
CREATE EXTERNAL TABLE IF NOT EXISTS ex_exam_covid19_record(
key string,
列名int,
列名int)
stored by 'org.apache.hadoop.hive.hbase.HbaseStorageHandler'
with serdeproperties
("hbase.columns.mapping"=":key,record:列名,record:列名")
tblproperties("hbase.table.name" = "exam:hbase_表名");
并将 xxx列和 yyy列合并生成 rowkey,并保存到 hive表中insert into table hbase_表名
select concat(xxx,yyy),/confirm/iedCount,/confirm/iedCount-if(pre_confirmedCount is
null,0,pre_/confirm/iedCount) from(
select
id,
/confirm/iedCount,
recordDate,
province,
lag(/confirm/iedCount) over (partition by province order by id) as pre_/confirm/iedCount
from ex_exam_record) tb1;
2
with
temp as (select concat(列名,列名) as key
from hbase_表)
insert into table 新表 select key,count(key)
from temp group by key;
遍历前10条数据scan '库名:表名',{LIMIT=>10}
创建时间分区表drop table if exists userbehavior_partitioned;
create table userbehavior_partitioned(
user_id int,
item_id int,
category_id int,
behavior_type string,
time string
)
partitioned by (dt string)
stored as orc
动态分区set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nostrict;
插入数据到分区表中insert overwrite table userbehavior_partitioned partition(dt)
select
user_id ,
item_id ,
category_id ,
behavior_type ,
from_unixtime(time,'YYYY-MM-dd HH:mm:ss') time,
substring(from_unixtime(time,'YYYY-MM-dd HH:mm:ss'),1,10) dt
from userbehavior
窗口函数
// val win = Window.partitionBy("continent").orderBy($"/confirm/iedIncr".cast("int").desc)
// df.where("recordDate=20200408")
// .select($"recordDate",$"coent",$"cName",$"conIncr",row_number().over(win).as("rank"))
// .where("rank=1").drop("rank")
// .show()
maven
org.apache.spark spark-core_2.112.3.4 org.apache.spark spark-sql_2.112.3.4



