-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Closed
Closed
Copy link
Labels
type:bugBug reports and fixesBug reports and fixes
Description
Bug Description
What happened:
Running the integration test for org.apache.hudi.integ.ITTestHoodieDemo#testParquetDemo throws the error below:
What you expected:
Steps to reproduce:
1.
2.
3.
Environment
Hudi version: 1.2.0-SNAPSHOT
Query engine: (Spark/Flink/Trino etc) Spark
Relevant configs:
/bin/bash -c spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-utilities.jar \
--table-type COPY_ON_WRITE \
--base-file-format PARQUET \
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \
--source-ordering-field ts \
--target-base-path /user/hive/warehouse/stock_ticks_cow \
--target-table stock_ticks_cow \
--props /var/demo/config/dfs-source.properties \
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
--enable-hive-sync \
--hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
--hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \
--hoodie-conf hoodie.datasource.hive_sync.username=hive \
--hoodie-conf hoodie.datasource.hive_sync.password=hive \
--hoodie-conf hoodie.datasource.hive_sync.partition_fields=dt \
--hoodie-conf hoodie.datasource.hive_sync.database=default \
--hoodie-conf hoodie.datasource.hive_sync.table=stock_ticks_cowLogs and Stack Trace
java.lang.ClassCastException: class org.apache.avro.util.Utf8 cannot be cast to class java.lang.String (org.apache.avro.util.Utf8 is in unnamed module of loader 'app'; java.lang.String is in module java.base of loader 'bootstrap')
at java.lang.String.compareTo(String.java:125) ~[?:?]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory.shouldKeepNewerRecord(BufferedRecordMergerFactory.java:492) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory.access$100(BufferedRecordMergerFactory.java:41) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory$EventTimeRecordMerger.finalMerge(BufferedRecordMergerFactory.java:193) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.FileGroupRecordBuffer.hasNextBaseRecord(FileGroupRecordBuffer.java:241) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.KeyBasedFileGroupRecordBuffer.hasNextBaseRecord(KeyBasedFileGroupRecordBuffer.java:138) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.KeyBasedFileGroupRecordBuffer.doHasNext(KeyBasedFileGroupRecordBuffer.java:147) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.FileGroupRecordBuffer.hasNext(FileGroupRecordBuffer.java:152) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.HoodieFileGroupReader.hasNext(HoodieFileGroupReader.java:247) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.HoodieFileGroupReader$HoodieFileGroupReaderIterator.hasNext(HoodieFileGroupReader.java:334) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.doMerge(FileGroupReaderBasedMergeHandle.java:270) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.io.IOUtils.runMerge(IOUtils.java:120) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:392) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:358) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$e664f7e$1(BaseSparkCommitActionExecutor.java:298) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:381) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.$anonfun$getOrElseUpdate$1(BlockManager.scala:1372) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1614) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1524) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1588) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1389) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1343) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:379) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) [spark-common-utils_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) [spark-common-utils_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) [spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) [spark-core_2.12-3.5.3.jar:3.5.3]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
26/01/18 13:39:49 WARN BlockManager: Putting block rdd_36_0 failed due to exception org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0.
26/01/18 13:39:49 WARN BlockManager: Block rdd_36_0 could not be removed as it was not found on disk or in memory
26/01/18 13:39:49 ERROR Executor: Exception in task 0.0 in stage 14.0 (TID 17)
org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:365) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$e664f7e$1(BaseSparkCommitActionExecutor.java:298) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:381) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.$anonfun$getOrElseUpdate$1(BlockManager.scala:1372) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1614) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1524) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1588) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1389) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1343) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:379) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) ~[spark-common-utils_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) ~[spark-common-utils_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) ~[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) [spark-core_2.12-3.5.3.jar:3.5.3]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.ClassCastException: class org.apache.avro.util.Utf8 cannot be cast to class java.lang.String (org.apache.avro.util.Utf8 is in unnamed module of loader 'app'; java.lang.String is in module java.base of loader 'bootstrap')
at java.lang.String.compareTo(String.java:125) ~[?:?]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory.shouldKeepNewerRecord(BufferedRecordMergerFactory.java:492) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory.access$100(BufferedRecordMergerFactory.java:41) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.BufferedRecordMergerFactory$EventTimeRecordMerger.finalMerge(BufferedRecordMergerFactory.java:193) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.FileGroupRecordBuffer.hasNextBaseRecord(FileGroupRecordBuffer.java:241) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.KeyBasedFileGroupRecordBuffer.hasNextBaseRecord(KeyBasedFileGroupRecordBuffer.java:138) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.KeyBasedFileGroupRecordBuffer.doHasNext(KeyBasedFileGroupRecordBuffer.java:147) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.buffer.FileGroupRecordBuffer.hasNext(FileGroupRecordBuffer.java:152) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.HoodieFileGroupReader.hasNext(HoodieFileGroupReader.java:247) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.table.read.HoodieFileGroupReader$HoodieFileGroupReaderIterator.hasNext(HoodieFileGroupReader.java:334) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.io.FileGroupReaderBasedMergeHandle.doMerge(FileGroupReaderBasedMergeHandle.java:270) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.io.IOUtils.runMerge(IOUtils.java:120) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:392) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:358) ~[hoodie-utilities.jar:1.2.0-SNAPSHOT]
... 35 more
Metadata
Metadata
Assignees
Labels
type:bugBug reports and fixesBug reports and fixes