Skip to content

Commit b7839f2

Browse files
committed
GH-3464 Improve DeltaByteArrayWriter.writeBytes to avoid unnecessary allocation and scalar prefix comparison
1 parent 4c8f4d4 commit b7839f2

1 file changed

Lines changed: 13 additions & 7 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.parquet.column.values.deltastrings;
2020

21+
import java.util.Arrays;
2122
import org.apache.parquet.bytes.ByteBufferAllocator;
2223
import org.apache.parquet.bytes.BytesInput;
2324
import org.apache.parquet.column.Encoding;
@@ -88,14 +89,19 @@ public String memUsageString(String prefix) {
8889

8990
@Override
9091
public void writeBytes(Binary v) {
91-
int i = 0;
92-
byte[] vb = v.getBytes();
93-
int length = previous.length < vb.length ? previous.length : vb.length;
94-
// find the number of matching prefix bytes between this value and the previous one
95-
for (i = 0; (i < length) && (previous[i] == vb[i]); i++)
96-
;
92+
byte[] vb = v.getBytesUnsafe();
93+
int length = Math.min(previous.length, vb.length);
94+
// Find the number of matching prefix bytes between this value and the previous one.
95+
// Arrays.mismatch is intrinsified by the JVM to use SIMD instructions.
96+
int i = Arrays.mismatch(previous, 0, length, vb, 0, length);
97+
if (i < 0) {
98+
i = length; // all bytes in the common range matched
99+
}
97100
prefixLengthWriter.writeInteger(i);
98101
suffixWriter.writeBytes(v.slice(i, vb.length - i));
99-
previous = vb;
102+
// Retain an owned copy for prefix comparison with the next value.
103+
// getBytesUnsafe() may return the backing array directly, so we must copy
104+
// if the Binary's backing bytes may be reused by the caller.
105+
previous = v.isBackingBytesReused() ? v.getBytes() : vb;
100106
}
101107
}

0 commit comments

Comments
 (0)