Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 862cd7d

Browse files
Split by rdf:type as early is possible.
1 parent e781e51 commit 862cd7d

3 files changed

Lines changed: 19 additions & 12 deletions

File tree

sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import org.apache.jena.vocabulary.{RDF, RDFS}
66
import net.sansa_stack.inference.data.RDFTriple
77
import net.sansa_stack.inference.utils.CollectionUtils
88
import org.slf4j.LoggerFactory
9+
import net.sansa_stack.inference.flink.utils.DataSetUtils.DataSetOps
910

1011
/**
1112
* A forward chaining implementation of the RDFS entailment regime.
@@ -48,19 +49,25 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
4849
val subPropertyMap = CollectionUtils.toMultiMap(subPropertyOfTriplesTrans.map(t => (t.subject, t.`object`)).collect)
4950

5051

52+
// split by rdf:type
53+
val split = triplesDS.partitionBy(t => t.predicate == RDF.`type`.getURI)
54+
var typeTriples = split._1
55+
var otherTriples = split._2
56+
57+
5158
// 2. SubPropertyOf inheritance according to rdfs7 is computed
5259

5360
/*
5461
rdfs7 aaa rdfs:subPropertyOf bbb .
5562
xxx aaa yyy . xxx bbb yyy .
5663
*/
5764
val triplesRDFS7 =
58-
triplesDS // all triples (s p1 o)
65+
otherTriples // all triples (s p1 o)
5966
.filter(t => subPropertyMap.contains(t.predicate)) // such that p1 has a super property p2
6067
.flatMap(t => subPropertyMap(t.predicate).map(supProp => RDFTriple(t.subject, supProp, t.`object`))) // create triple (s p2 o)
6168

6269
// add triples
63-
triplesDS = triplesDS.union(triplesRDFS7)
70+
otherTriples = otherTriples.union(triplesRDFS7)
6471

6572
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
6673

@@ -72,7 +79,7 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
7279
val domainMap = domainTriples.map(t => (t.subject, t.`object`)).collect.toMap
7380

7481
val triplesRDFS2 =
75-
triplesDS
82+
otherTriples
7683
.filter(t => domainMap.contains(t.predicate))
7784
.map(t => RDFTriple(t.subject, RDF.`type`.getURI, domainMap(t.predicate)))
7885

@@ -84,16 +91,15 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
8491
val rangeMap = rangeTriples.map(t => (t.subject, t.`object`)).collect().toMap
8592

8693
val triplesRDFS3 =
87-
triplesDS
94+
otherTriples
8895
.filter(t => rangeMap.contains(t.predicate))
8996
.map(t => RDFTriple(t.`object`, RDF.`type`.getURI, rangeMap(t.predicate)))
9097

98+
// rdfs2 and rdfs3 generated rdf:type triples which we'll add to the existing ones
9199
val triples23 = triplesRDFS2.union(triplesRDFS3)
92100

93-
// get rdf:type tuples here as intermediate result
94-
val typeTriples = triplesDS
95-
.filter(t => t.predicate == RDF.`type`.getURI)
96-
.union(triples23)
101+
// all rdf:type triples here as intermediate result
102+
typeTriples = typeTriples.union(triples23)
97103

98104
// 4. SubClass inheritance according to rdfs9
99105

@@ -109,9 +115,10 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
109115
// 5. merge triples and remove duplicates
110116
val allTriples = env.union(
111117
Seq(
118+
otherTriples,
112119
subClassOfTriplesTrans,
113120
subPropertyOfTriplesTrans,
114-
triples23,
121+
typeTriples,
115122
triplesRDFS7,
116123
triplesRDFS9))
117124
.distinct()

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/RDFGraphMaterializer.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ object RDFGraphMaterializer {
4646

4747
// load triples from disk
4848
val graph = RDFGraphLoader.loadFromDisk(input, session.sparkContext, 4)
49+
println(s"|G| = ${graph.size()}")
4950

5051
// create reasoner
5152
val reasoner = profile match {
@@ -55,7 +56,7 @@ object RDFGraphMaterializer {
5556

5657
// compute inferred graph
5758
val inferredGraph = reasoner.apply(graph)
58-
print(inferredGraph.size())
59+
println(s"|G_inf| = ${inferredGraph.size()}")
5960

6061
// write triples to disk
6162
RDFGraphWriter.writeGraphToFile(inferredGraph, output.getAbsolutePath, writeToSingleFile, sortedOutput)

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/ForwardRuleReasonerRDFS.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import net.sansa_stack.inference.spark.data.RDFGraph
77
import net.sansa_stack.inference.spark.utils.RDDUtils
88
import net.sansa_stack.inference.utils.CollectionUtils
99
import org.slf4j.LoggerFactory
10+
import net.sansa_stack.inference.spark.utils.RDDUtils.RDDOps
1011

1112
/**
1213
* A forward chaining implementation of the RDFS entailment regime.
@@ -53,8 +54,6 @@ class ForwardRuleReasonerRDFS(sc: SparkContext) extends ForwardRuleReasoner{
5354
val subClassOfMapBC = sc.broadcast(subClassOfMap)
5455
val subPropertyMapBC = sc.broadcast(subPropertyMap)
5556

56-
import net.sansa_stack.inference.spark.utils.RDDUtils.RDDOps
57-
5857
// split by rdf:type
5958
val split = triplesRDD.partitionBy(t => t.predicate == RDF.`type`.getURI)
6059
var typeTriples = split._1

0 commit comments

Comments
 (0)