一种处理Hive元数据与文件类型不同时SQL查询失败的方法（二）

文章目录

一、异常触发SQL二、异常处理三、Hive on Spark依赖的Hive jar包部署
继上一篇之后，又发现了一种新的报错位置。本篇对这种情况进行处理，并验证这种处理方式是否适用于Hive on Spark环境。

一、异常触发SQL

构造测试数据
(1) 建表，插入数据

create table t1(id float,content string) stored as parquet;
insert into t1 vlaues(1.1,'content1'),(2.2,'content2');
create table error_type(id int,content string) stored as parquet;

(2) 拷贝文件到类型不兼容的表

hdfs dfs -cp /user/hive/warehouse/testdb.db/t1/000000_0 /user/hive/warehouse/testdb.db/error_type/

在前面两步之后，执行如下SQL：

select * from error_type where content='content1';

报错并有如下错误日志：

Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: org.apache.hadoop.io.FloatWritable cannot be cast to org.apache.hadoop.io.IntWritable
	at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector.get(WritableIntObjectInspector.java:36)
	at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSonString(SerDeUtils.java:227)
	at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSonString(SerDeUtils.java:364)
	at org.apache.hadoop.hive.serde2.SerDeUtils.getJSonString(SerDeUtils.java:200)
	at org.apache.hadoop.hive.serde2.SerDeUtils.getJSonString(SerDeUtils.java:186)
	at org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:520)
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489)
	at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:133)
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
	at org.apache.hadoop.hive.ql.exec.spark.HivebaseFunctionResultList.hasNext(HivebaseFunctionResultList.java:85)
	at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
	at scala.collection.Iterator$class.foreach(Iterator.scala:891)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232)
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:121)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
 ]
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:133) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HivebaseFunctionResultList.hasNext(HivebaseFunctionResultList.java:85) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) ~[scala-library-2.11.12.jar:?]
	at scala.collection.Iterator$class.foreach(Iterator.scala:891) ~[scala-library-2.11.12.jar:?]
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) ~[scala-library-2.11.12.jar:?]
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.scheduler.Task.run(Task.scala:121) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	... 3 more
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.FloatWritable cannot be cast to org.apache.hadoop.io.IntWritable
	at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector.get(WritableIntObjectInspector.java:36) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:251) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:292) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:732) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:126) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:146) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:484) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:133) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at org.apache.hadoop.hive.ql.exec.spark.HivebaseFunctionResultList.hasNext(HivebaseFunctionResultList.java:85) ~[hive-exec-2.1.1-cdh6.3.0.jar:2.1.1-cdh6.3.0]
	at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) ~[scala-library-2.11.12.jar:?]
	at scala.collection.Iterator$class.foreach(Iterator.scala:891) ~[scala-library-2.11.12.jar:?]
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) ~[scala-library-2.11.12.jar:?]
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.SparkContext$$anonfun$38.apply(SparkContext.scala:2232) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.scheduler.Task.run(Task.scala:121) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413) ~[spark-core_2.11-2.4.0-cdh6.3.0.jar:2.4.0-cdh6.3.0]
	... 3 more

二、异常处理

其中org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)函数中有序列化每个字段的逻辑：

  
  @Override
  public Writable doSerialize(Object obj, ObjectInspector objInspector)
      throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(getClass().toString()
          + " can only serialize struct types, but we got: "
          + objInspector.getTypeName());
    }

    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List fields = soi.getAllStructFieldRefs();
    List

一种处理Hive元数据与文件类型不同时SQL查询失败的方法（二）

Java相关栏目本月热门文章