目录
1.arrays
2.map
3.struct
1.arrays
创建一个表,指定一个location字段的数据类型为array
hive> create table person (name string, work_locations array) ROW FORMAT DELIMITED FIELDS TERMINATED BY 't' COLLECTION ITEMS TERMINATED BY ',';
注意我在文本中录入数据的使用制表符分割,然后将数据加载到表中
hive> select * from person; OK a ["beijing","shanghai","tianjin","hangzhou"] b ["beijing","shanghai","tianjin","hangzhou"]
上面建表的时候较多了一个collection items terminated,是指定集合里面的内容用什么分割,上面还有一个字段分割,这两分割字段是不能重复的,重复的话那就认定都是前面的字段,后一个字段的内容就都是null
既然是数组,就可以读取数组里面的元素
hive> select name,work_locations[0] from person; OK a beijing b beijing
如果是越界的下标不会报错,只是返回null
hive> select work_locations[4] from person where name='a' ; OK NULL2.map
建表语句
create table scoremap(name string, score map) ROW FORMAT DELIMITED FIELDS TERMINATED BY 't' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':';
将数据导入表中
查询所有数据
hive> select * from scoremap;
OK
小明 {"数学":80,"语文":89,"英语":95}
小红 {"语文":60,"数学":80,"英语":99}
查询map中的value值;
hive> select score['数学'] from scoremap; OK 80 80
3.struct
create table scoreStruct(
name string,
score struct
)ROW FORMAT DELIMITED
FIELDS TERMINATED BY 't'
COLLECTION ITEMS TERMINATED BY ',';
需要导入的数据
小明 语文,91,000001,余老师 小红 数学,100,000002,体育老师
hive> select * from scoreStruct;
OK
小明 {"course":"语文","score":91,"course_id":1,"tearcher":"余老师"}
小红 {"course":"数学","score":100,"course_id":2,"tearcher":"体育老师"}



