Skip to content

Commit 0e50edb

Browse files
committed
GH-5333 allow exporting of data directly to the http output stream, but attempt to detect any issues first by exporting up to 1024 statements to a dummy stream and throwing an exception before we set the status.
1 parent f6a72a9 commit 0e50edb

5 files changed

Lines changed: 228 additions & 40 deletions

File tree

docker/Dockerfile-jetty

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ WORKDIR /tmp
1111
RUN unzip -q /tmp/rdf4j.zip
1212

1313
# Final workbench
14-
FROM jetty:9-jre17-eclipse-temurin
14+
FROM jetty:9-jre17-eclipse-temurin
1515
LABEL org.opencontainers.image.authors="Bart Hanssens (bart.hanssens@bosa.fgov.be)"
1616

1717
USER root
1818

19-
ENV JAVA_OPTIONS="-Dorg.eclipse.rdf4j.appdata.basedir=/var/rdf4j -Dorg.eclipse.rdf4j.rio.jsonld_secure_mode=false"
19+
ENV JAVA_OPTIONS="-Xmx2g -Dorg.eclipse.rdf4j.appdata.basedir=/var/rdf4j -Dorg.eclipse.rdf4j.rio.jsonld_secure_mode=false"
2020
ENV JETTY_MODULES="server,bytebufferpool,threadpool,security,servlet,webapp,ext,plus,deploy,annotations,http,jsp,jstl"
2121

2222
COPY --from=temp /tmp/eclipse-rdf4j*/war/*.war /var/lib/jetty/webapps/

docker/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
set -e
33
./build.sh
44

5+
export JAVA_OPTIONS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"
6+
57
echo "Starting the docker container for ${APP_SERVER}"
68
docker compose up --force-recreate -d
79

tools/server-spring/src/main/java/org/eclipse/rdf4j/http/server/repository/statements/ExportStatementsView.java

Lines changed: 131 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212

1313
import static javax.servlet.http.HttpServletResponse.SC_OK;
1414

15-
import java.io.ByteArrayOutputStream;
15+
import java.io.IOException;
1616
import java.io.OutputStream;
1717
import java.nio.charset.Charset;
1818
import java.util.Map;
19+
import java.util.Objects;
1920

2021
import javax.servlet.http.HttpServletRequest;
2122
import javax.servlet.http.HttpServletResponse;
@@ -24,42 +25,40 @@
2425
import org.eclipse.rdf4j.http.server.repository.RepositoryInterceptor;
2526
import org.eclipse.rdf4j.model.IRI;
2627
import org.eclipse.rdf4j.model.Resource;
28+
import org.eclipse.rdf4j.model.Statement;
2729
import org.eclipse.rdf4j.model.Value;
2830
import org.eclipse.rdf4j.repository.RepositoryConnection;
2931
import org.eclipse.rdf4j.repository.RepositoryException;
3032
import org.eclipse.rdf4j.rio.RDFFormat;
33+
import org.eclipse.rdf4j.rio.RDFHandler;
3134
import org.eclipse.rdf4j.rio.RDFHandlerException;
3235
import org.eclipse.rdf4j.rio.RDFWriter;
3336
import org.eclipse.rdf4j.rio.RDFWriterFactory;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
3439
import org.springframework.web.servlet.View;
3540

3641
/**
37-
* View used to export statements. Renders the statements as RDF using a serialization specified using a parameter or
38-
* Accept header.
42+
* Streams statements as RDF in the format requested by the client.
3943
*
4044
* @author Herko ter Horst
4145
*/
4246
public class ExportStatementsView implements View {
4347

4448
public static final String SUBJECT_KEY = "subject";
45-
4649
public static final String PREDICATE_KEY = "predicate";
47-
4850
public static final String OBJECT_KEY = "object";
49-
5051
public static final String CONTEXTS_KEY = "contexts";
51-
5252
public static final String USE_INFERENCING_KEY = "useInferencing";
53-
5453
public static final String CONNECTION_KEY = "connection";
55-
5654
public static final String TRANSACTION_ID_KEY = "transactionID";
57-
5855
public static final String FACTORY_KEY = "factory";
59-
6056
public static final String HEADERS_ONLY = "headersOnly";
6157

6258
private static final ExportStatementsView INSTANCE = new ExportStatementsView();
59+
public static final int MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS = 1024;
60+
61+
private static final Logger logger = LoggerFactory.getLogger(ExportStatementsView.class);
6362

6463
public static ExportStatementsView getInstance() {
6564
return INSTANCE;
@@ -70,53 +69,152 @@ private ExportStatementsView() {
7069

7170
@Override
7271
public String getContentType() {
72+
// Spring ignores this for View implementations; we set it in render().
7373
return null;
7474
}
7575

76-
@SuppressWarnings("rawtypes")
7776
@Override
7877
public void render(Map model, HttpServletRequest request, HttpServletResponse response) throws Exception {
79-
Resource subj = (Resource) model.get(SUBJECT_KEY);
78+
79+
response.setBufferSize(1024 * 1024); // 1MB
80+
81+
Resource subj = (Resource) Objects.requireNonNull(model, "model should not be null").get(SUBJECT_KEY);
8082
IRI pred = (IRI) model.get(PREDICATE_KEY);
8183
Value obj = (Value) model.get(OBJECT_KEY);
8284
Resource[] contexts = (Resource[]) model.get(CONTEXTS_KEY);
83-
boolean useInferencing = (Boolean) model.get(USE_INFERENCING_KEY);
85+
boolean useInferencing = Boolean.TRUE.equals(model.get(USE_INFERENCING_KEY));
86+
boolean headersOnly = Boolean.TRUE.equals(model.get(HEADERS_ONLY));
87+
88+
RDFWriterFactory factory = (RDFWriterFactory) model.get(FACTORY_KEY);
89+
RDFFormat rdfFormat = factory.getRDFFormat();
8490

85-
boolean headersOnly = (Boolean) model.get(HEADERS_ONLY);
91+
attemptToDetectExceptions(request, factory, headersOnly, subj, pred, obj, useInferencing, contexts);
92+
93+
response.setStatus(SC_OK);
94+
95+
String mimeType = rdfFormat.getDefaultMIMEType();
96+
if (rdfFormat.hasCharset()) {
97+
Charset charset = rdfFormat.getCharset();
98+
mimeType += "; charset=" + charset.name();
99+
}
100+
response.setContentType(mimeType);
101+
102+
String filename = "statements";
103+
if (rdfFormat.getDefaultFileExtension() != null) {
104+
filename += "." + rdfFormat.getDefaultFileExtension();
105+
}
106+
response.setHeader("Content-Disposition", "attachment; filename=" + filename);
107+
108+
if (headersOnly) {
109+
response.setContentLength(0);
110+
response.flushBuffer();
111+
return;
112+
}
113+
114+
try (OutputStream out = response.getOutputStream()) {
115+
RDFWriter writer = factory.getWriter(out);
116+
try (RepositoryConnection conn = RepositoryInterceptor.getRepositoryConnection(request)) {
117+
conn.exportStatements(subj, pred, obj, useInferencing, writer, contexts);
118+
out.flush();
119+
response.flushBuffer();
120+
} catch (RDFHandlerException e) {
121+
var serverHTTPException = new ServerHTTPException("Serialization error: " + e.getMessage(), e);
122+
if (!response.isCommitted()) {
123+
response.reset();
124+
}
125+
throw serverHTTPException;
126+
} catch (RepositoryException e) {
127+
var serverHTTPException = new ServerHTTPException("Repository error: " + e.getMessage(), e);
128+
if (!response.isCommitted()) {
129+
response.reset();
130+
}
131+
throw serverHTTPException;
132+
} catch (Throwable e) {
133+
if (!response.isCommitted()) {
134+
response.reset();
135+
}
136+
throw e;
137+
}
86138

87-
RDFWriterFactory rdfWriterFactory = (RDFWriterFactory) model.get(FACTORY_KEY);
139+
}
88140

89-
RDFFormat rdfFormat = rdfWriterFactory.getRDFFormat();
141+
}
90142

91-
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
92-
RDFWriter rdfWriter = rdfWriterFactory.getWriter(baos);
143+
private static void attemptToDetectExceptions(HttpServletRequest request, RDFWriterFactory rdfWriterFactory,
144+
boolean headersOnly, Resource subj, IRI pred, Value obj, boolean useInferencing, Resource[] contexts)
145+
throws IOException, ServerHTTPException {
146+
try (OutputStream out = OutputStream.nullOutputStream()) {
147+
RDFHandler rdfWriter = new LimitedSizeRDFHandler(rdfWriterFactory.getWriter(out),
148+
MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS);
93149
if (!headersOnly) {
94150
try (RepositoryConnection conn = RepositoryInterceptor.getRepositoryConnection(request)) {
95151
conn.exportStatements(subj, pred, obj, useInferencing, rdfWriter, contexts);
96152
} catch (RDFHandlerException e) {
97153
throw new ServerHTTPException("Serialization error: " + e.getMessage(), e);
98154
} catch (RepositoryException e) {
99155
throw new ServerHTTPException("Repository error: " + e.getMessage(), e);
156+
} catch (LimitedSizeReachedException ignored) {
100157
}
101158
}
102-
try (OutputStream out = response.getOutputStream()) {
103-
response.setStatus(SC_OK);
159+
}
160+
}
104161

105-
String mimeType = rdfFormat.getDefaultMIMEType();
106-
if (rdfFormat.hasCharset()) {
107-
Charset charset = rdfFormat.getCharset();
108-
mimeType += "; charset=" + charset.name();
109-
}
110-
response.setContentType(mimeType);
162+
private static class LimitedSizeRDFHandler implements RDFHandler {
111163

112-
String filename = "statements";
113-
if (rdfFormat.getDefaultFileExtension() != null) {
114-
filename += "." + rdfFormat.getDefaultFileExtension();
115-
}
116-
response.setHeader("Content-Disposition", "attachment; filename=" + filename);
117-
out.write(baos.toByteArray());
164+
private final RDFHandler delegate;
165+
private final long maxSize;
166+
private long currentSize = 0;
167+
168+
public LimitedSizeRDFHandler(RDFHandler delegate, long maxSize) {
169+
this.delegate = delegate;
170+
this.maxSize = maxSize;
171+
}
172+
173+
@Override
174+
public void startRDF() throws RDFHandlerException {
175+
delegate.startRDF();
176+
}
177+
178+
@Override
179+
public void endRDF() throws RDFHandlerException {
180+
delegate.endRDF();
181+
}
182+
183+
@Override
184+
public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
185+
delegate.handleNamespace(prefix, uri);
186+
incrementCurrentSize();
187+
}
188+
189+
@Override
190+
public void handleStatement(Statement st) throws RDFHandlerException {
191+
delegate.handleStatement(st);
192+
incrementCurrentSize();
193+
}
194+
195+
@Override
196+
public void handleComment(String comment) throws RDFHandlerException {
197+
delegate.handleComment(comment);
198+
incrementCurrentSize();
199+
}
200+
201+
private void incrementCurrentSize() {
202+
currentSize++;
203+
if (currentSize > maxSize) {
204+
endRDF();
205+
logger.trace(
206+
"Limited size reached, throwing LimitedSizeReachedException to signal that we are done testing the export of statements for exceptions.");
207+
throw new LimitedSizeReachedException();
118208
}
119209
}
120210
}
121211

212+
private static class LimitedSizeReachedException extends RuntimeException {
213+
@Override
214+
public Throwable fillInStackTrace() {
215+
// Do not fill in the stack trace to avoid performance overhead
216+
return this;
217+
}
218+
}
219+
122220
}

tools/server/src/test/java/org/eclipse/rdf4j/http/server/ProtocolIT.java

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.List;
3030
import java.util.Objects;
3131
import java.util.Random;
32+
import java.util.Scanner;
3233
import java.util.Set;
3334
import java.util.concurrent.ExecutorService;
3435
import java.util.concurrent.Executors;
@@ -46,6 +47,7 @@
4647
import org.apache.http.message.BasicNameValuePair;
4748
import org.eclipse.rdf4j.common.io.IOUtil;
4849
import org.eclipse.rdf4j.http.protocol.Protocol;
50+
import org.eclipse.rdf4j.model.IRI;
4951
import org.eclipse.rdf4j.model.Namespace;
5052
import org.eclipse.rdf4j.model.ValueFactory;
5153
import org.eclipse.rdf4j.model.impl.SimpleNamespace;
@@ -57,6 +59,7 @@
5759
import org.eclipse.rdf4j.rio.RDFFormat;
5860
import org.eclipse.rdf4j.rio.Rio;
5961
import org.junit.jupiter.api.AfterAll;
62+
import org.junit.jupiter.api.AfterEach;
6063
import org.junit.jupiter.api.BeforeAll;
6164
import org.junit.jupiter.api.Test;
6265

@@ -86,6 +89,12 @@ public static void stopServer() throws Exception {
8689
server.stop();
8790
}
8891

92+
@AfterEach
93+
public void clearRepository() throws Exception {
94+
// Clear the repository after each test
95+
delete(Protocol.getStatementsLocation(TestServer.REPOSITORY_URL));
96+
}
97+
8998
/**
9099
* Tests the server's methods for updating all data in a repository.
91100
*/
@@ -172,7 +181,7 @@ public void testQueryDirect_POST() throws Exception {
172181

173182
System.out.println("Query Direct POST Status: " + response.getStatusLine());
174183
int statusCode = response.getStatusLine().getStatusCode();
175-
assertEquals(true, statusCode >= 200 && statusCode < 400);
184+
assertTrue(statusCode >= 200 && statusCode < 400);
176185
}
177186

178187
/**
@@ -192,7 +201,7 @@ public void testUpdateDirect_POST() throws Exception {
192201

193202
System.out.println("Update Direct Post Status: " + response.getStatusLine());
194203
int statusCode = response.getStatusLine().getStatusCode();
195-
assertEquals(true, statusCode >= 200 && statusCode < 400);
204+
assertTrue(statusCode >= 200 && statusCode < 400);
196205
}
197206

198207
/**
@@ -215,7 +224,7 @@ public void testUpdateForm_POST() throws Exception {
215224

216225
System.out.println("Update Form Post Status: " + response.getStatusLine());
217226
int statusCode = response.getStatusLine().getStatusCode();
218-
assertEquals(true, statusCode >= 200 && statusCode < 400);
227+
assertTrue(statusCode >= 200 && statusCode < 400);
219228
}
220229

221230
/**
@@ -370,6 +379,49 @@ public void testUpdateResponse_HEAD() throws Exception {
370379
}
371380
}
372381

382+
@Test
383+
public void testUploadAndRetrieveStatements_GET() throws Exception {
384+
385+
String statementsLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL);
386+
387+
// PUT the Turtle file into the repository
388+
final String baseLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL);
389+
final String file = "/testcases/default-graph-2.ttl";
390+
391+
// 1. PUT the same file multiple times so that we would trigger an OOM error when retrieving it if it were not
392+
// directly written to the http output stream
393+
for (int i = 1; i <= 20000; i++) {
394+
IRI context = vf.createIRI("http://example.org/graph" + i);
395+
putFile(baseLocation, file, context);
396+
}
397+
398+
// GET all statements back from the same endpoint
399+
URL url = new URL(statementsLocation);
400+
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
401+
conn.setRequestMethod("GET");
402+
conn.setRequestProperty("Accept", RDFFormat.NQUADS.getDefaultMIMEType()); // ask for easy-to-parse format
403+
conn.connect();
404+
405+
try {
406+
assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode(),
407+
"GET /statements should respond 200 OK");
408+
409+
// Parse the response stream and count triples
410+
try (InputStream in = conn.getInputStream()) {
411+
Scanner scanner = new Scanner(in);
412+
int count = 0;
413+
while (scanner.hasNext()) {
414+
scanner.nextLine();
415+
count++;
416+
}
417+
assertEquals(1860000, count, "Expected 1860000 triples, but got " + count + " instead.");
418+
}
419+
} finally {
420+
conn.disconnect();
421+
}
422+
423+
}
424+
373425
/**
374426
* Test for SES-1861
375427
*
@@ -560,9 +612,16 @@ private void deleteNamespace(String location) throws Exception {
560612
}
561613
}
562614

563-
private void putFile(String location, String file) throws Exception {
564-
System.out.println("Put file to " + location);
615+
/**
616+
* PUT a file into a specific named graph (context) by adding the ?context=<encodedIRI> query parameter.
617+
*/
618+
private void putFile(String repositoryBaseLocation, String file, IRI context) throws Exception {
619+
String location = repositoryBaseLocation
620+
+ "?" + Protocol.CONTEXT_PARAM_NAME + "=" + Protocol.encodeValue(context);
621+
putFile(location, file); // delegate to the existing helper
622+
}
565623

624+
private void putFile(String location, String file) throws Exception {
566625
URL url = new URL(location);
567626
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
568627
conn.setRequestMethod("PUT");

0 commit comments

Comments
 (0)