栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

记一次spark写parquet格式文件失败(spark璇诲彇parquet鏂囦欢)

记一次spark写parquet格式文件失败(spark璇诲彇parquet鏂囦欢)

我这是因为一个Set类型的字段,add了一个null导致写入失败。/(ㄒoㄒ)/~~

Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4, master, executor 7): org.apache.spark.SparkException: Task failed while writing rows
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$.org$apache$spark$internal$io$SparkHadoopMapReduceWriter$$executeTask(SparkHadoopMapReduceWriter.scala:197)
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$$anonfun$3.apply(SparkHadoopMapReduceWriter.scala:105)
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$$anonfun$3.apply(SparkHadoopMapReduceWriter.scala:100)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:100)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:340)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
	at org.apache.parquet.io.api.Binary$FromStringBinary.encodeUTF8(Binary.java:216)
	at org.apache.parquet.io.api.Binary$FromStringBinary.(Binary.java:206)
	at org.apache.parquet.io.api.Binary.fromString(Binary.java:519)
	at org.apache.parquet.thrift.ParquetWriteProtocol.writeStringToRecordConsumer(ParquetWriteProtocol.java:652)
	at org.apache.parquet.thrift.ParquetWriteProtocol.access$900(ParquetWriteProtocol.java:52)
	at org.apache.parquet.thrift.ParquetWriteProtocol$PrimitiveWriteProtocol.writeString(ParquetWriteProtocol.java:289)
	at org.apache.parquet.thrift.ParquetWriteProtocol.writeString(ParquetWriteProtocol.java:634)
	at com.test.data.InfoDf.write(InfoDf.java:1751)
	at org.apache.parquet.hadoop.thrift.TbaseWriteSupport.write(TbaseWriteSupport.java:57)
	at org.apache.parquet.hadoop.thrift.ThriftWriteSupport.write(ThriftWriteSupport.java:73)
	at org.apache.parquet.hadoop.thrift.ThriftWriteSupport.write(ThriftWriteSupport.java:31)
	at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:123)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:180)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:46)
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$$anonfun$4.apply(SparkHadoopMapReduceWriter.scala:164)
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$$anonfun$4.apply(SparkHadoopMapReduceWriter.scala:161)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1361)
	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$.org$apache$spark$internal$io$SparkHadoopMapReduceWriter$$executeTask(SparkHadoopMapReduceWriter.scala:173)
	... 8 more
	Suppressed: java.io.FileNotFoundException: File does not exist: /user/hive_table/dwm_info_df/date=20211201/_temporary/0/_temporary/attempt_*_0001_r_000000_3/part-r-00000.snappy.parquet (inode 31511531) Holder DFSClient_attempt_*_0000_m_000000_0_1847753967_93_application_*_* does not have any open files.
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3034)
	at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.analyzeFileState(FSDirWriteFileOp.java:581)
	at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.validateAddBlock(FSDirWriteFileOp.java:168)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2913)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:982)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:612)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:532)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1099)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1027)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1805)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:3026)

		at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
		at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
		at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
		at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
		at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
		at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
		at org.apache.hadoop.hdfs.DFSOutputStream.addBlock(DFSOutputStream.java:1125)
		at org.apache.hadoop.hdfs.DataStreamer.locateFollowingBlock(DataStreamer.java:1885)
		at org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1684)
		at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:723)
	Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): File does not exist: /user/hive_table/dwm_info_df/date=20211201/_temporary/0/_temporary/attempt_*_0001_r_000000_3/part-r-00000.snappy.parquet (inode 31511531) Holder DFSClient_attempt_*_0000_m_000000_0_1847753967_93_application_*_* does not have any open files.
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3034)
	at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.analyzeFileState(FSDirWriteFileOp.java:581)
	at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.validateAddBlock(FSDirWriteFileOp.java:168)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2913)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:982)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:612)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:532)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1099)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1027)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1805)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:3026)

		at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1555)
		at org.apache.hadoop.ipc.Client.call(Client.java:1501)
		at org.apache.hadoop.ipc.Client.call(Client.java:1411)
		at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:228)
		at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116)
		at com.sun.proxy.$Proxy14.addBlock(Unknown Source)
		at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:544)
		at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
		at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
		at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
		at java.lang.reflect.Method.invoke(Method.java:498)
		at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
		at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
		at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
		at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
		at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
		at com.sun.proxy.$Proxy15.addBlock(Unknown Source)
		at org.apache.hadoop.hdfs.DFSOutputStream.addBlock(DFSOutputStream.java:1122)
		... 3 more

Driver stacktrace:
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/772234.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号