Skip to content

Commit fc3b11e

Browse files
HTTPCLIENT-2360 - Enhance filename encoding in multipart/form-data per RFC 6266/5987 (#618)
- Modified FormBodyPartBuilder to support HttpMultipartMode, adding filename* with UTF-8 encoding for non-ISO-8859-1 filenames in STRICT/EXTENDED modes, skipping it in LEGACY mode. - Updated HttpRFC7578Multipart to use mode for filename encoding: percent-encode in EXTENDED, ISO-8859-1 in STRICT/LEGACY, and always encode filename* per RFC 5987. - Adjusted MultipartEntityBuilder to propagate mode to FormBodyPartBuilder, ensuring consistent behavior across the pipeline. - Fixed tests to align with mode-specific expectations, maintaining LEGACY mode’s raw UTF-8 filename behavior.
1 parent 0e5497a commit fc3b11e

4 files changed

Lines changed: 94 additions & 17 deletions

File tree

httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/FormBodyPartBuilder.java

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@
2727

2828
package org.apache.hc.client5.http.entity.mime;
2929

30+
import java.nio.charset.CharsetEncoder;
31+
import java.nio.charset.StandardCharsets;
3032
import java.util.ArrayList;
3133
import java.util.List;
3234

3335
import org.apache.hc.core5.http.ContentType;
3436
import org.apache.hc.core5.http.NameValuePair;
3537
import org.apache.hc.core5.http.message.BasicNameValuePair;
38+
import org.apache.hc.core5.net.PercentCodec;
3639
import org.apache.hc.core5.util.Args;
3740
import org.apache.hc.core5.util.Asserts;
3841

@@ -47,22 +50,52 @@ public class FormBodyPartBuilder {
4750
private ContentBody body;
4851
private final Header header;
4952

53+
/**
54+
* The multipart mode determining how filenames are encoded in the {@code Content-Disposition}
55+
* header, defaults to {@link HttpMultipartMode#STRICT}.
56+
*
57+
* @since 5.5
58+
*/
59+
private HttpMultipartMode mode;
60+
61+
/**
62+
* Encoder used to check if strings can be encoded in ISO-8859-1, supporting filename
63+
* compatibility determinations in multipart form data.
64+
*/
65+
private CharsetEncoder iso8859_1Encoder;
66+
67+
/**
68+
* Creates a new builder instance with the specified name, content body, and multipart mode.
69+
*
70+
* @param name the name of the form field
71+
* @param body the content body of the part
72+
* @param mode the {@link HttpMultipartMode} to use, determining filename encoding behavior;
73+
*
74+
* @return a new {@code FormBodyPartBuilder} instance
75+
* @since 5.5
76+
*/
77+
public static FormBodyPartBuilder create(final String name, final ContentBody body, final HttpMultipartMode mode) {
78+
return new FormBodyPartBuilder(name, body, mode);
79+
}
80+
5081
public static FormBodyPartBuilder create(final String name, final ContentBody body) {
51-
return new FormBodyPartBuilder(name, body);
82+
return new FormBodyPartBuilder(name, body, HttpMultipartMode.STRICT);
5283
}
5384

5485
public static FormBodyPartBuilder create() {
5586
return new FormBodyPartBuilder();
5687
}
5788

58-
FormBodyPartBuilder(final String name, final ContentBody body) {
89+
FormBodyPartBuilder(final String name, final ContentBody body, final HttpMultipartMode mode) {
5990
this();
6091
this.name = name;
6192
this.body = body;
93+
this.mode = mode != null ? mode : HttpMultipartMode.STRICT;
6294
}
6395

6496
FormBodyPartBuilder() {
6597
this.header = new Header();
98+
this.mode = HttpMultipartMode.STRICT;
6699
}
67100

68101
public FormBodyPartBuilder setName(final String name) {
@@ -102,6 +135,35 @@ public FormBodyPartBuilder removeFields(final String name) {
102135
return this;
103136
}
104137

138+
/**
139+
* Determines whether the given string can be encoded in ISO-8859-1 without loss of data.
140+
* This is used to decide whether the {@code filename} parameter can be used as-is or if
141+
* the {@code filename*} parameter is needed for non-ISO-8859-1 characters.
142+
*
143+
* @param input the string to check, must not be {@code null}
144+
* @return {@code true} if the string can be encoded in ISO-8859-1, {@code false} otherwise
145+
* @since 5.5
146+
*/
147+
private boolean canEncodeToISO8859_1(final String input) {
148+
if (iso8859_1Encoder == null) {
149+
iso8859_1Encoder = StandardCharsets.ISO_8859_1.newEncoder();
150+
}
151+
return iso8859_1Encoder.canEncode(input);
152+
}
153+
154+
/**
155+
* Encodes the given filename according to RFC 5987, prefixing it with {@code UTF-8''} and
156+
* applying percent-encoding to non-ASCII characters. This is used for the {@code filename*}
157+
* parameter in the {@code Content-Disposition} header when non-ISO-8859-1 characters are present.
158+
*
159+
* @param filename the filename to encode, must not be {@code null}
160+
* @return the RFC 5987-encoded string, e.g., {@code UTF-8''example%20text}
161+
* @since 5.5
162+
*/
163+
private static String encodeRFC5987(final String filename) {
164+
return "UTF-8''" + PercentCodec.RFC5987.encode(filename);
165+
}
166+
105167
public FormBodyPart build() {
106168
Asserts.notBlank(this.name, "Name");
107169
Asserts.notNull(this.body, "Content body");
@@ -114,7 +176,12 @@ public FormBodyPart build() {
114176
final List<NameValuePair> fieldParameters = new ArrayList<>();
115177
fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_NAME, this.name));
116178
if (this.body.getFilename() != null) {
117-
fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME, this.body.getFilename()));
179+
final String filename = this.body.getFilename();
180+
fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME, filename));
181+
// Add filename* only if non-ISO-8859-1 and not in LEGACY mode
182+
if (mode != HttpMultipartMode.LEGACY && !canEncodeToISO8859_1(filename)) {
183+
fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME_START, encodeRFC5987(filename)));
184+
}
118185
}
119186
headerCopy.addField(new MimeField(MimeConsts.CONTENT_DISPOSITION, "form-data", fieldParameters));
120187
}
@@ -139,5 +206,4 @@ public FormBodyPart build() {
139206
}
140207
return new FormBodyPart(this.name, this.body, headerCopy);
141208
}
142-
143-
}
209+
}

httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/HttpRFC7578Multipart.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class HttpRFC7578Multipart extends AbstractMultipartFormat {
4040

4141
private final List<MultipartPart> parts;
4242

43+
private final HttpMultipartMode mode;
44+
4345
/**
4446
* Constructs a new instance of {@code HttpRFC7578Multipart} with the given charset, boundary, parts, preamble, and epilogue.
4547
*
@@ -54,9 +56,11 @@ public HttpRFC7578Multipart(
5456
final String boundary,
5557
final List<MultipartPart> parts,
5658
final String preamble,
57-
final String epilogue) {
59+
final String epilogue,
60+
final HttpMultipartMode mode) {
5861
super(charset, boundary, preamble, epilogue);
5962
this.parts = parts;
63+
this.mode = mode != null ? mode : HttpMultipartMode.STRICT; // Default to STRICT
6064
}
6165

6266
/**
@@ -69,10 +73,12 @@ public HttpRFC7578Multipart(
6973
public HttpRFC7578Multipart(
7074
final Charset charset,
7175
final String boundary,
72-
final List<MultipartPart> parts) {
73-
this(charset,boundary,parts,null, null);
76+
final List<MultipartPart> parts,
77+
final HttpMultipartMode mode) {
78+
this(charset,boundary,parts,null, null, mode);
7479
}
7580

81+
7682
@Override
7783
public List<MultipartPart> getParts() {
7884
return parts;
@@ -94,12 +100,17 @@ protected void formatMultipartHeader(final MultipartPart part, final OutputStrea
94100
writeBytes(name, out);
95101
writeBytes("=\"", out);
96102
if (value != null) {
97-
if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME) ||
98-
name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START)) {
99-
final String encodedValue = name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START) ?
100-
"UTF-8''" + PercentCodec.RFC5987.encode(value) : PercentCodec.RFC5987.encode(value);
101-
final byte[] encodedBytes = encodedValue.getBytes(StandardCharsets.US_ASCII);
102-
out.write(encodedBytes);
103+
if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START)) {
104+
final String encodedValue = "UTF-8''" + PercentCodec.RFC5987.encode(value);
105+
writeBytes(encodedValue, StandardCharsets.US_ASCII, out);
106+
} else if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME)) {
107+
if (mode == HttpMultipartMode.EXTENDED) {
108+
final String encodedValue = PercentCodec.RFC5987.encode(value);
109+
writeBytes(encodedValue, StandardCharsets.US_ASCII, out);
110+
} else {
111+
// Default to ISO-8859-1 for RFC 7578 compliance in STRICT/LEGACY
112+
writeBytes(value, StandardCharsets.ISO_8859_1, out);
113+
}
103114
} else {
104115
writeBytes(value, out);
105116
}

httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/MultipartEntityBuilder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ MultipartFormEntity buildEntity() {
289289
if (charsetCopy == null) {
290290
charsetCopy = StandardCharsets.UTF_8;
291291
}
292-
form = new HttpRFC7578Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue);
292+
form = new HttpRFC7578Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue, modeCopy);
293293
} else {
294294
form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue);
295295
}

httpclient5/src/test/java/org/apache/hc/client5/http/entity/mime/TestMultipartForm.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,11 @@ void testMultipartFormBrowserCompatibleNonASCIIHeaders() throws Exception {
295295
@SuppressWarnings("resource")
296296
final FormBodyPart p1 = FormBodyPartBuilder.create(
297297
"field1",
298-
new InputStreamBody(new FileInputStream(tmpfile), s1 + ".tmp")).build();
298+
new InputStreamBody(new FileInputStream(tmpfile), s1 + ".tmp"), HttpMultipartMode.LEGACY).build();
299299
@SuppressWarnings("resource")
300300
final FormBodyPart p2 = FormBodyPartBuilder.create(
301301
"field2",
302-
new InputStreamBody(new FileInputStream(tmpfile), s2 + ".tmp")).build();
302+
new InputStreamBody(new FileInputStream(tmpfile), s2 + ".tmp"), HttpMultipartMode.LEGACY).build();
303303
final LegacyMultipart multipart = new LegacyMultipart(
304304
StandardCharsets.UTF_8, "foo",
305305
Arrays.asList(p1, p2));

0 commit comments

Comments
 (0)