@@ -7,7 +7,9 @@ import java.nio.charset.StandardCharsets
77import org .apache .flink .api .common .operators .Order
88import org .apache .flink .api .scala ._
99import org .apache .flink .core .fs .FileSystem
10+ import org .apache .jena .graph .GraphUtil
1011import org .apache .jena .rdf .model .{Model , ModelFactory }
12+ import org .apache .jena .sparql .graph .GraphFactory
1113import org .apache .jena .sparql .util .TripleComparator
1214import org .slf4j .LoggerFactory
1315
@@ -52,32 +54,36 @@ object RDFGraphWriter {
5254
5355 // sort triples if enabled
5456 val tmp = if (sorted) {
55- graph.triples// .sortPartition(t => t, Order.ASCENDING) // map(t => (t, t)).sortPartition(1 , Order.DESCENDING).map(_._1 )
57+ graph.triples.sortPartition(_.hashCode() , Order .ASCENDING )
5658 } else {
5759 graph.triples
5860 }
5961
60- if (singleFile) {
61- tmp.setParallelism(1 )
62- }
63-
64- tmp
62+ val sink = tmp
6563 .map(new JenaTripleToNTripleString ()) // to N-TRIPLES string
6664 .writeAsText(path.toString, writeMode = FileSystem .WriteMode .OVERWRITE )
6765
66+ // write to single file if enabled
67+ if (singleFile) {
68+ sink.setParallelism(1 )
69+ }
70+
6871 logger.info(" finished writing triples to disk in " + (System .currentTimeMillis()- startTime) + " ms." )
6972 }
7073
74+ /**
75+ * Converts an RDF graph to an Apache Jena in-memory model.
76+ *
77+ * @note For large graphs this can be too expensive
78+ * and lead to a OOM exception
79+ *
80+ * @param graph the RDF graph
81+ *
82+ * @return the in-memory Apache Jena model containing the triples
83+ */
7184 def convertToModel (graph : RDFGraph ) : Model = {
72- val modelString = graph.triples.map(new JenaTripleToNTripleString ())
73- .collect().mkString(" \n " )
74-
7585 val model = ModelFactory .createDefaultModel()
76-
77- if (! modelString.trim.isEmpty) {
78- model.read(new ByteArrayInputStream (modelString.getBytes(StandardCharsets .UTF_8 )), null , " N-TRIPLES" )
79- }
80-
86+ GraphUtil .add(model.getGraph, graph.triples.collect().toArray)
8187 model
8288 }
8389}
0 commit comments