|
18 | 18 | */ |
19 | 19 | package org.apache.parquet.column.values.deltastrings; |
20 | 20 |
|
| 21 | +import java.util.Arrays; |
21 | 22 | import org.apache.parquet.bytes.ByteBufferAllocator; |
22 | 23 | import org.apache.parquet.bytes.BytesInput; |
23 | 24 | import org.apache.parquet.column.Encoding; |
@@ -88,14 +89,19 @@ public String memUsageString(String prefix) { |
88 | 89 |
|
89 | 90 | @Override |
90 | 91 | public void writeBytes(Binary v) { |
91 | | - int i = 0; |
92 | | - byte[] vb = v.getBytes(); |
93 | | - int length = previous.length < vb.length ? previous.length : vb.length; |
94 | | - // find the number of matching prefix bytes between this value and the previous one |
95 | | - for (i = 0; (i < length) && (previous[i] == vb[i]); i++) |
96 | | - ; |
| 92 | + byte[] vb = v.getBytesUnsafe(); |
| 93 | + int length = Math.min(previous.length, vb.length); |
| 94 | + // Find the number of matching prefix bytes between this value and the previous one. |
| 95 | + // Arrays.mismatch is intrinsified by the JVM to use SIMD instructions. |
| 96 | + int i = Arrays.mismatch(previous, 0, length, vb, 0, length); |
| 97 | + if (i < 0) { |
| 98 | + i = length; // all bytes in the common range matched |
| 99 | + } |
97 | 100 | prefixLengthWriter.writeInteger(i); |
98 | 101 | suffixWriter.writeBytes(v.slice(i, vb.length - i)); |
99 | | - previous = vb; |
| 102 | + // Retain an owned copy for prefix comparison with the next value. |
| 103 | + // getBytesUnsafe() may return the backing array directly, so we must copy |
| 104 | + // if the Binary's backing bytes may be reused by the caller. |
| 105 | + previous = v.isBackingBytesReused() ? v.getBytes() : vb; |
100 | 106 | } |
101 | 107 | } |
0 commit comments