Skip to content

Commit 318fbf2

Browse files
committed
Bugfix: Handle arbitrary UTF-8 characters in strings (before only ASCII characters were recognized).
1 parent d9da0d0 commit 318fbf2

3 files changed

Lines changed: 39 additions & 29 deletions

File tree

src/main/java/com/muukong/burp/BurpWebRpcRequestHandler.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ public void setRequestResponse(HttpRequestResponse httpRequestResponse) {
8484
grpcWebRequestResponse = GrpcWebRequestResponse.fromGrpcWebProto(body.getBytes());
8585
}
8686

87-
String output = grpcWebRequestResponse.prettyPrint();
88-
requestEditor.setContents(ByteArray.byteArray(output.getBytes()));
87+
requestEditor.setContents(ByteArray.byteArray(grpcWebRequestResponse.prettyPrint()));
8988
}
9089

9190
@Override

src/main/java/com/muukong/protobuf/PBDisassembler.java

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
import com.muukong.util.Util;
44

55
import java.math.BigInteger;
6+
import java.nio.ByteBuffer;
7+
import java.nio.charset.CharacterCodingException;
8+
import java.nio.charset.CharsetDecoder;
9+
import java.nio.charset.CodingErrorAction;
10+
import java.nio.charset.StandardCharsets;
611

712
/**
813
* A disassembler for protobuf messages that on input a protobuf message attempts to disassemble it
@@ -215,35 +220,46 @@ private ISerializable disassembleLen(int fieldNumber) {
215220

216221
final int length = disassembleVarInt().toInt(); // We can (rather) safely assume that the length fits into an int
217222

218-
int printableTokens = 0;
219-
for ( int i = 0; i < length; ++i ) {
220-
byte token = input[cursor + i];
221-
if ( Util.isPrintable(token) )
222-
++printableTokens;
223+
// An empty LEN field is unambiguously an empty string — skip all heuristics.
224+
if ( length == 0 ) {
225+
return new PBString("");
223226
}
224227

225228
/*
226229
The following heuristic is employed:
227-
(1) If all characters are printable, disassemble it as a string and exit.
230+
(1) If all characters are printable UTF-8 characters, disassemble it as a string and exit.
228231
(2) Otherwise, attempt to parse bytes as sub-message. Exit on success.
229232
(3) Otherwise, attempt to parse message as packed repeated fields. Exit on success.
230233
(4) Otherwise, consume `length` bytes (this should always succeed) and continue
231234
*/
232235

233-
final boolean isString = printableTokens == length; // TODO: can this heuristic be improved?
234-
235-
// Case (1)
236-
if ( isString ) {
237-
238-
StringBuilder sb = new StringBuilder();
239-
for ( int i = 0; i < length; ++i ) {
240-
byte b = input[cursor + i];
241-
sb.append((char) b);
236+
// Case (1): try to decode as UTF-8 and verify all characters are printable
237+
boolean isString = false;
238+
String stringValue = null;
239+
{
240+
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
241+
.onMalformedInput(CodingErrorAction.REPORT)
242+
.onUnmappableCharacter(CodingErrorAction.REPORT);
243+
try {
244+
stringValue = decoder.decode(ByteBuffer.wrap(input, cursor, length)).toString();
245+
// Reject strings that contain non-printable control characters
246+
// (allow common whitespace: tab=0x09, LF=0x0A, CR=0x0D)
247+
isString = true;
248+
for ( int i = 0; i < stringValue.length(); ++i ) {
249+
char c = stringValue.charAt(i);
250+
if ( c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F ) {
251+
isString = false;
252+
break;
253+
}
254+
}
255+
} catch ( CharacterCodingException e ) {
256+
isString = false;
242257
}
258+
}
243259

260+
if ( isString ) {
244261
cursor += length;
245-
246-
return new PBString( sb.toString() );
262+
return new PBString( stringValue );
247263
}
248264

249265
// Case (2)

src/main/java/com/muukong/protobuf/PBString.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.muukong.parsing.IParseable;
44
import com.muukong.util.Util;
55

6+
import java.nio.charset.StandardCharsets;
67
import java.util.ArrayList;
78
import java.util.List;
89

@@ -34,13 +35,7 @@ public String prettyPrint(String indent) {
3435

3536
@Override
3637
public byte[] serializeValue() {
37-
38-
byte[] valueBytes = new byte[value.length()];
39-
40-
for ( int i = 0; i < value.length(); ++i )
41-
valueBytes[i] = (byte) value.charAt(i);
42-
43-
return valueBytes;
38+
return value.getBytes(StandardCharsets.UTF_8);
4439
}
4540

4641
@Override
@@ -53,12 +48,12 @@ public byte[] serializeValueWithFieldNumber(int fieldNumber) {
5348
byte[] tagBytes = new PBVarInt(tag).serializeValue();
5449
result.addAll( Util.convertToList(tagBytes) );
5550

56-
// Append string length encoded in VARINT format (without prefix)
57-
byte[] lengthBytes = new PBVarInt(value.length()).serializeValue();
51+
// Append string byte length encoded in VARINT format (use byte count, not char count)
52+
byte[] stringBytes = serializeValue();
53+
byte[] lengthBytes = new PBVarInt(stringBytes.length).serializeValue();
5854
result.addAll(Util.convertToList(lengthBytes));
5955

6056
// Append string value itself
61-
byte[] stringBytes = serializeValue();
6257
result.addAll(Util.convertToList(stringBytes));
6358

6459
return Util.convertToArray(result);

0 commit comments

Comments
 (0)