typed_value array: ListArray
[
StructArray
-- validity:
[
valid,
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
[0],
null,
]
-- child 1: "typed_value" (Utf8)
StringArray
[
"comedy",
null,
"drama",
]
],
]
typed_value array: ListArray
[
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
null,
]
-- child 1: "typed_value" (Struct([Field { name: "a", data_type: Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Int32, nullable: true }]) }, Field { name: "b", data_type: Struct([Field { name: "value", dat\
a_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Utf8, nullable: true }]) }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "a" (Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Int32, nullable: true }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
null,
]
-- child 1: "typed_value" (Int32)
PrimitiveArray<Int32>
[
1,
2,
]
]
-- child 1: "b" (Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Utf8, nullable: true }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
null,
]
-- child 1: "typed_value" (Utf8)
StringArray
[
"comedy",
"drama",
]
]
]
],
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
[2, 1, 2, 0, 4, 13, 115, 116, 114],
[2, 1, 3, 0, 5, 44, 40, 77, 0, 0],
]
-- child 1: "typed_value" (Struct([Field { name: "a", data_type: Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Int32, nullable: true }]) }, Field { name: "b", data_type: Struct([Field { name: "value", dat\
a_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Utf8, nullable: true }]) }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "a" (Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Int32, nullable: true }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
null,
]
-- child 1: "typed_value" (Int32)
PrimitiveArray<Int32>
[
3,
4,
]
]
-- child 1: "b" (Struct([Field { name: "value", data_type: BinaryView, nullable: true }, Field { name: "typed_value", data_type: Utf8, nullable: true }]))
StructArray
-- validity:
[
valid,
valid,
]
[
-- child 0: "value" (BinaryView)
BinaryViewArray
[
null,
null,
]
-- child 1: "typed_value" (Utf8)
StringArray
[
"action",
"horror",
]
]
]
],
]
While adding support for variant array unshredding to arrow-rs, I discovered that parquet-cli is unable to correctly read the parquet files for cases 86 and 126, both due to the same index out of bounds error:
The backtrace for case-126.parquet is identical.
Looking at arrow-rs debug printouts of the arrays, I don't see anything obviously wrong, tho?
arrow-rs debug printout
For case-086.parquet, the input data is:
And for case-126.parquet, we have: