mongodb源表--->group3_mongo.appbehaviorpoint(hive) --->ods.appbehaviorpoint(hive)
group3_mongo.appbehaviorpoint 只是个mongodb到hive的映射
ods.appbehaviorpoint 才是真正的同步目标表
-----------------------------------------
1.如果mongodb源表增加了字段,那么group3_mongo.appbehaviorpoint 需要先drop table 才行,然后再执行建表语句。
一.
drop table group3_mongo.appBehaviorPoint;
二.
create external table group3_mongo.appBehaviorPoint
(
`_id` string,
track_id string,
anonymous_id string,
distinct_id string,
event string,
type string,
time string,
app_id string,
app_name string,
app_version string,
device_id string,
is_first_day string,
lib string,
element_type string,
lib_method string,
lib_version string,
manufacturer string,
model string,
network_type string,
os string,
longitude string,
latitude string,
os_version string,
element_content string,
screen_height string,
screen_width string,
timezone_offset int,
wifi string,
title string,
login_id string,
screen_name string,
ip string,
province string,
city string,
`_class` string,
is_first_time string,
resume_from_background string,
url string,
event_duration string,
referrer string,
carrier string,
element_id string,
download string,
is_h5 string,
vcProductName string,
vcProductCode string,
duserAmount bigint,
lower_channel_code string,
iinterfaceMode string, --2022/01/07 add
channel string, --2022/01/07 add
rydSpreadUV string --2022/01/07 add
)
STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
WITH SERDEPROPERTIES('mongo.columns.mapping'='{"_id":"_id","track_id":"track_id","anonymous_id":"anonymous_id",
"distinct_id":"distinct_id","event":"event","type":"type","time":"time","app_id":"app_id","app_name":"app_name",
"app_version":"app_version","device_id":"device_id","is_first_day":"is_first_day","lib":"lib","element_type":"element_type",
"lib_method":"lib_method","lib_version":"lib_version","manufacturer":"manufacturer","model":"model","network_type":"network_type",
"os":"os","longitude":"longitude","latitude":"latitude","os_version":"os_version","element_content":"element_content",
"screen_height":"screen_height","screen_width":"screen_width","timezone_offset":"timezone_offset","wifi":"wifi","title":"title",
"login_id":"login_id","screen_name":"screen_name","ip":"ip","province":"province", "city":"city","_class":"_class",
"is_first_time":"is_first_time","resume_from_background":"resume_from_background","url":"url","event_duration":"event_duration",
"referrer":"referrer","carrier":"carrier","element_id":"element_id","download":"download","is_h5":"is_h5",
"vcProductName":"vcProductName","vcProductCode":"vcProductCode","duserAmount":"duserAmount",
"lower_channel_code":"lower_channel_code","iinterfaceMode":"iinterfaceMode","channel":"channel","rydSpreadUV":"rydSpreadUV"}')
----- 这里需要用MonGODB 数据库的地址和 数据库名称 和集合名称
TBLPROPERTIES('mongo.uri'='mongodb://risk_hive:5mmTrKKwo@dds-bpxxxxxxxxxxxxx-pub.mongodb.rds.aliyuncs.com:3717/db_name.tablename');
2. 增加字段和其他
ods表增加字段 ----- ALTER TABLE ods.appbehaviorpoint_minutes ADD COLUMNS(`iinterfaceMode` string); ALTER TABLE ods.appbehaviorpoint_minutes ADD COLUMNS(`channel` string); ALTER TABLE ods.appbehaviorpoint_minutes ADD COLUMNS(`rydSpreadUV` string); -----如果类型写错,用这条语句修改, 字段名要出现2次。 ALTER TABLE ods.appbehaviorpoint CHANGE channel channel string; ----查看表有那些字段 describe group3_mongo.appbehaviorpoint_minutes;
3.删除ods的要插入的分区【一定要执行此步骤,否则执行第4步插入数据后新加入的字段里面全是NULL】
如果要往20220107分区里面插入数据,需要先删除此分区 然后再执行第4步 alter table ods.appbehaviorpoint_minutes drop partition (dt = '20220107')
4.执行插入语句 group3_mongodb--->ods
set mongo.input.split.create_input_splits=false; insert overwrite table ods.appbehaviorpoint PARTITION ( dt = '20220107') select * from group3_mongo.appbehaviorpoint -- where to_date(time) < '2022-01-07';



