Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 50c2c7f

Browse files
N-Triple writer extended.
1 parent d6df9ab commit 50c2c7f

3 files changed

Lines changed: 22 additions & 9 deletions

File tree

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package net.sansa_stack.inference.utils
2+
3+
import net.sansa_stack.inference.data.RDFTriple
4+
5+
/**
6+
* Convert an RDFTriple object to an N-Triple string.
7+
*
8+
* @author Lorenz Buehmann
9+
*/
10+
class RDFTripleToNTripleString extends Function1[RDFTriple, String] with java.io.Serializable {
11+
override def apply(t: RDFTriple): String = {
12+
val objStr = if(t.`object`.startsWith("http:") || t.`object`.startsWith("ftp:")) s"<${t.`object`}>" else t.`object`
13+
s"<${t.subject}> <${t.predicate}> ${objStr} ."
14+
}
15+
}

sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/data/RDFGraphWriter.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import org.apache.flink.api.common.operators.Order
77
import org.apache.flink.api.scala._
88
import org.apache.flink.core.fs.FileSystem
99
import org.apache.jena.rdf.model.{Model, ModelFactory}
10-
import net.sansa_stack.inference.utils.RDFTripleOrdering
10+
import net.sansa_stack.inference.utils.{RDFTripleOrdering, RDFTripleToNTripleString}
1111
import org.slf4j.LoggerFactory
1212

1313
/**
@@ -27,7 +27,7 @@ object RDFGraphWriter {
2727
implicit val ordering = RDFTripleOrdering
2828

2929
graph.triples.map(t=>(t,t)).sortPartition(1, Order.DESCENDING).map(_._1)
30-
.map(t => "<" + t.subject + "> <" + t.predicate + "> <" + t.`object` + "> .") // to N-TRIPLES string
30+
.map(new RDFTripleToNTripleString()) // to N-TRIPLES string
3131
.writeAsText(path, writeMode = FileSystem.WriteMode.OVERWRITE)
3232

3333
logger.info("finished writing triples to disk in " + (System.currentTimeMillis()-startTime) + "ms.")
@@ -48,15 +48,14 @@ object RDFGraphWriter {
4848
implicit val ordering = RDFTripleOrdering
4949

5050
graph.triples.map(t=>(t,t)).sortPartition(1, Order.DESCENDING).map(_._1)
51-
.map(t => "<" + t.subject + "> <" + t.predicate + "> <" + t.`object` + "> .") // to N-TRIPLES string
51+
.map(new RDFTripleToNTripleString()) // to N-TRIPLES string
5252
.writeAsText(path.getAbsolutePath, writeMode = FileSystem.WriteMode.OVERWRITE)
5353

5454
logger.info("finished writing triples to disk in " + (System.currentTimeMillis()-startTime) + "ms.")
5555
}
5656

5757
def convertToModel(graph: RDFGraph) : Model = {
58-
val modelString = graph.triples.map(t =>
59-
"<" + t.subject + "> <" + t.predicate + "> <" + t.`object` + "> .") // to N-TRIPLES string
58+
val modelString = graph.triples.map(new RDFTripleToNTripleString())
6059
.collect().mkString("\n")
6160

6261
val model = ModelFactory.createDefaultModel()

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/RDFGraphWriter.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import org.apache.jena.rdf.model.{Model, ModelFactory}
77
import org.apache.spark.rdd.RDD
88
import org.apache.spark.sql.DataFrame
99
import net.sansa_stack.inference.data.RDFTriple
10-
import net.sansa_stack.inference.utils.RDFTripleOrdering
10+
import net.sansa_stack.inference.utils.{RDFTripleOrdering, RDFTripleToNTripleString}
1111
import org.slf4j.LoggerFactory
1212

1313
/**
@@ -54,7 +54,7 @@ object RDFGraphWriter {
5454
}
5555

5656
// convert to N-Triple format
57-
var triplesNTFormat = tmp.map(t => "<" + t.subject + "> <" + t.predicate + "> <" + t.`object` + "> .")
57+
var triplesNTFormat = tmp.map(new RDFTripleToNTripleString())
5858

5959
// convert to single file, i.e. move al lto one partition
6060
// (might be very expensive and contradicts the Big Data paradigm on Hadoop in general)
@@ -73,8 +73,7 @@ object RDFGraphWriter {
7373
}
7474

7575
def convertToModel(graph: RDFGraph) : Model = {
76-
val modelString = graph.triples.map(t =>
77-
"<" + t.subject + "> <" + t.predicate + "> <" + t.`object` + "> .") // to N-TRIPLES string
76+
val modelString = graph.triples.map(new RDFTripleToNTripleString())
7877
.collect().mkString("\n")
7978

8079
val model = ModelFactory.createDefaultModel()

0 commit comments

Comments
 (0)