Skip to content

Commit afded43

Browse files
committed
Extract charset encoding from HTTP headers or html headers.
workaround for #451 and #50.
1 parent 1b4a0f0 commit afded43

2 files changed

Lines changed: 64 additions & 6 deletions

File tree

cSploit/src/main/java/org/csploit/android/net/http/RequestParser.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import java.util.ArrayList;
2828
import java.util.Arrays;
2929
import java.util.Iterator;
30+
import java.util.regex.Matcher;
31+
import java.util.regex.Pattern;
3032

3133
public class RequestParser
3234
{
@@ -478,4 +480,46 @@ public static ArrayList<HttpCookie> getCookiesFromHeaders(ArrayList<String> head
478480

479481
return null;
480482
}
483+
484+
/**
485+
* extract the charset encoding from the HTTP response headers.
486+
*
487+
* @param contentType content-type header to be parsed
488+
* @return returns the charset encoding if we've found it, or null.
489+
*/
490+
public static String getCharsetFromHeaders(String contentType){
491+
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
492+
String[] parts = contentType.toLowerCase().trim().split("=");
493+
if (parts.length > 0)
494+
return parts[1];
495+
}
496+
497+
return null;
498+
}
499+
500+
/**
501+
* extract the charset encoding of a web site from the <meta> headers.
502+
*
503+
* @param body html body of the site to be parsed
504+
* @return returns the charset encoding if we've found it, or null.
505+
*/
506+
public static String getCharsetFromBody(String body) {
507+
if (body != null) {
508+
// match <body>, <body onLoad="">, etc...
509+
int headEnd = body.toLowerCase().indexOf("</head>");
510+
511+
// return null if there's no head tags
512+
if (headEnd == -1)
513+
return null;
514+
515+
String body_head = body.toLowerCase().substring(0, headEnd);
516+
517+
Pattern p = Pattern.compile("charset=([\"a-z0-9A-Z-]+)");
518+
Matcher m = p.matcher(body_head);
519+
if (m.find())
520+
return m.toMatchResult().group(1).replaceAll("\"", "");
521+
}
522+
523+
return null;
524+
}
481525
}

cSploit/src/main/java/org/csploit/android/net/http/proxy/StreamThread.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
*/
1919
package org.csploit.android.net.http.proxy;
2020

21-
import java.io.IOException;
22-
import java.io.InputStream;
23-
import java.io.OutputStream;
24-
21+
import org.csploit.android.core.Logger;
2522
import org.csploit.android.core.Profiler;
2623
import org.csploit.android.core.System;
27-
import org.csploit.android.core.Logger;
2824
import org.csploit.android.net.ByteBuffer;
2925
import org.csploit.android.net.http.RequestParser;
3026

27+
import java.io.IOException;
28+
import java.io.InputStream;
29+
import java.io.OutputStream;
30+
3131
public class StreamThread implements Runnable
3232
{
3333
private final static String[] FILTERED_CONTENT_TYPES = new String[]
@@ -159,7 +159,21 @@ public void run(){
159159

160160
headers = patched;
161161

162-
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
162+
// try to get the charset encoding from the HTTP headers.
163+
String charset = RequestParser.getCharsetFromHeaders(contentType);
164+
165+
// if we haven't found the charset encoding on the HTTP headers, try it out on the body.
166+
if (charset == null) {
167+
charset = RequestParser.getCharsetFromBody(body);
168+
}
169+
170+
if (charset != null) {
171+
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes(charset));
172+
}
173+
else {
174+
// if we haven't found the charset encoding, just handle it on ByteBuffer()
175+
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
176+
}
163177

164178
mWriter.write(mBuffer.getData());
165179
mWriter.flush();

0 commit comments

Comments
 (0)