Skip to content

Commit 0fb012b

Browse files
committed
Move from a recursive heuristic to a simple linear pass. This avoids potential exponential blowup in computational complexity.
1 parent 318fbf2 commit 0fb012b

1 file changed

Lines changed: 73 additions & 15 deletions

File tree

src/main/java/com/muukong/protobuf/PBDisassembler.java

Lines changed: 73 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ private PBPackedRepeatedField attemptDisassembleAsPackedRepeatedFields(int lengt
147147
if ( isRemainingMessageParseable() ) {
148148
return prf;
149149
} else {
150-
cursor = cursorBackup; // Restore state (TODO: is this necessary?)
150+
cursor = cursorBackup;
151151
return null;
152152
}
153153
} catch ( Exception ex ) {
@@ -181,7 +181,7 @@ private PBSubMessage attemptDisassembleAsSubMessage(int length) {
181181
if ( isRemainingMessageParseable() ) {
182182
return new PBSubMessage(message);
183183
} else {
184-
cursor = cursorBackup; // Restore state (TODO: is this necessary?)
184+
cursor = cursorBackup;
185185
return null;
186186
}
187187
} catch ( Exception ex ) {
@@ -283,28 +283,86 @@ private ISerializable disassembleLen(int fieldNumber) {
283283
* but render the remainder of the message invalid (i.e. it cannot be disassembled). This method checks if
284284
* the remaining message can be parsed without errors.
285285
*
286+
* Rather than recursively re-parsing (which causes exponential blowup on deeply nested messages), this
287+
* performs a single linear pass that verifies tag/length consistency without descending into sub-messages.
288+
*
286289
* @return true if remaining message can be disassembled, false otherwise
287290
*/
288291
private boolean isRemainingMessageParseable() {
289292

290-
final int cursorBackup = cursor;
293+
int offset = cursor;
294+
295+
try {
296+
// Walk every field in the remaining bytes
297+
while ( offset < input.length ) {
298+
299+
long tag = 0;
300+
int shift = 0;
301+
while ( true ) {
302+
if ( offset >= input.length ) return false;
303+
byte b = input[offset++];
304+
tag |= (long)(b & 0x7F) << shift;
305+
shift += 7;
306+
if ( (b & 0x80) == 0 ) break; // MSB clear → last byte of varint
307+
if ( shift >= 64 ) return false; // varint too long to be valid
308+
}
291309

292-
// Copy remaining message to `bytesLeft`
293-
byte[] bytesLeft = new byte[input.length - cursor];
294-
System.arraycopy(input, cursor, bytesLeft, 0, input.length - cursor);
310+
int wireType = (int)(tag & 0x7);
295311

296-
// Initialize new disassembler instance
297-
PBDisassembler p = new PBDisassembler(bytesLeft);
312+
switch ( wireType ) {
298313

299-
// Attempt to disassemble remaining message
300-
try {
301-
PBMessage m = p.disassemble();
302-
} catch ( Exception ex ) { // Fail
303-
cursor = cursorBackup;
314+
case PBWireTypes.VARINT:
315+
// Value is a varint — skip it by consuming bytes until the MSB is clear.
316+
while ( true ) {
317+
if ( offset >= input.length ) return false;
318+
byte b = input[offset++];
319+
if ( (b & 0x80) == 0 ) break;
320+
}
321+
break;
322+
323+
case PBWireTypes.I64:
324+
// Value is always exactly 8 bytes (fixed64, sfixed64, double).
325+
offset += 8;
326+
break;
327+
328+
case PBWireTypes.LEN:
329+
// Value is a length-prefixed byte sequence (string, bytes, sub-message, …).
330+
// Read the length varint, then skip that many bytes.
331+
long length = 0;
332+
shift = 0;
333+
while ( true ) {
334+
if ( offset >= input.length ) return false;
335+
byte b = input[offset++];
336+
length |= (long)(b & 0x7F) << shift;
337+
shift += 7;
338+
if ( (b & 0x80) == 0 ) break;
339+
if ( shift >= 64 ) return false;
340+
}
341+
// Skip the payload — we deliberately do NOT recurse into it.
342+
offset += (int) length;
343+
break;
344+
345+
case PBWireTypes.I32:
346+
// Value is always exactly 4 bytes (fixed32, sfixed32, float).
347+
offset += 4;
348+
break;
349+
350+
default:
351+
// Wire types 3 (SGROUP) and 4 (EGROUP) are deprecated and unsupported;
352+
// any other value is simply invalid.
353+
return false;
354+
}
355+
356+
// After consuming a field the offset must never overshoot the end of the input.
357+
if ( offset > input.length ) return false;
358+
}
359+
360+
// We consumed exactly all remaining bytes — the layout is consistent.
361+
return true;
362+
363+
} catch ( Exception ex ) {
304364
return false;
305365
}
306-
307-
return true; // Success
308366
}
309367

310368
/**

0 commit comments

Comments
 (0)