create table t1(c1 string, c2 string) row format delimited fields terminated by ',' stored as textfile;
vim t1.txt
N,1 ,2
load data local inpath 't1.txt' overwrite into table t1;
hive> select * from t1; OK NULL 1 2 Time taken: 0.924 seconds, Fetched: 2 row(s) hive> select * from t1 where c1 is null; OK NULL 1 Time taken: 0.293 seconds, Fetched: 1 row(s) hive> select * from t1 where c1 = ''; OK 2 Time taken: 0.108 seconds, Fetched: 1 row(s)把长度为 0 的字符串认为 NULL
如果已经有数据,需要把长度为 0 的字符串认为 NULL。
t2.txt 文件 。
1,,2 3,4,5
t2.txt 文件第 1 行, 之间没有任何字符,默认 hive 不会把此列认为是 NULL
create table t2(c1 string, c2 string,c3 string) row format delimited fields terminated by ',' stored as textfile; load data local inpath 't2.txt' overwrite into table t2; hive> select * from t2 where c2 is null; OK Time taken: 1.184 seconds hive> select * from t2 where c2 = ''; OK 1 2 Time taken: 0.106 seconds, Fetched: 1 row(s) hive>
如果需要默认为 NULL,设置 serdeproperties 属性。
alter table t2 set serdeproperties('serialization.null.format' = '');
hive> select * from t2 where c2 is null;
OK
1 NULL 2
Time taken: 0.149 seconds, Fetched: 1 row(s)
hive> select * from t2 where c2 = '';
OK
Time taken: 0.148 seconds



