Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 5f17cb7

Browse files
Do TC computation in separate reasoner.
1 parent fe1a371 commit 5f17cb7

5 files changed

Lines changed: 190 additions & 145 deletions

File tree

sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/RDFGraphMaterializer.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ object RDFGraphMaterializer {
3838

3939
// load triples from disk
4040
val graph = RDFGraphLoader.loadFromDisk(input(0), env)
41-
print(graph.size())
41+
println(s"|G| = ${graph.size()}")
4242

4343
// create reasoner
4444
val reasoner = profile match {
@@ -48,13 +48,15 @@ object RDFGraphMaterializer {
4848

4949
// compute inferred graph
5050
val inferredGraph = reasoner.apply(graph)
51-
println(s"|G| = $inferredGraph.size()")
51+
println(s"|G_inf| = ${inferredGraph.size()}")
5252

5353
// write triples to disk
5454
RDFGraphWriter.writeToDisk(inferredGraph, output)
5555

56-
env.execute(s"RDF ${profile} Reasoning")
56+
println(env.getExecutionPlan())
5757

58+
// run the program
59+
env.execute(s"RDF ${profile} Reasoning")
5860
}
5961

6062
// the config object

sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasoner.scala

Lines changed: 3 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
11
package net.sansa_stack.inference.flink.forwardchaining
22

3-
import net.sansa_stack.inference.flink.data.RDFGraph
4-
import org.apache.flink.api.common.typeinfo.TypeInformation
5-
import org.apache.flink.api.scala.{DataSet, _}
6-
import org.apache.flink.util.Collector
73
import net.sansa_stack.inference.data.RDFTriple
8-
import net.sansa_stack.inference.utils.Profiler
4+
import net.sansa_stack.inference.flink.data.RDFGraph
5+
import org.apache.flink.api.scala.DataSet
96

107
import scala.collection.mutable
11-
import scala.reflect.ClassTag
128

139
/**
1410
* A forward chaining based reasoner.
1511
*
1612
* @author Lorenz Buehmann
1713
*/
18-
trait ForwardRuleReasoner extends Profiler{
14+
trait ForwardRuleReasoner extends TransitiveReasoner{
1915

2016
/**
2117
* Applies forward chaining to the given RDF graph and returns a new RDF graph that contains all additional
@@ -26,136 +22,6 @@ trait ForwardRuleReasoner extends Profiler{
2622
*/
2723
def apply(graph: RDFGraph) : RDFGraph
2824

29-
// def computeTransitiveClosure[A, B, C](s: mutable.Set[(A, B, C)]): mutable.Set[(A, B, C)] = {
30-
// val t = addTransitive(s)
31-
// // recursive call if set changed, otherwise stop and return
32-
// if (t.size == s.size) s else computeTransitiveClosure(t)
33-
// }
34-
35-
def computeTransitiveClosure(s: mutable.Set[RDFTriple]): mutable.Set[RDFTriple] = {
36-
val t = addTransitive(s)
37-
// recursive call if set changed, otherwise stop and return
38-
if (t.size == s.size) s else computeTransitiveClosure(t)
39-
}
40-
41-
// def addTransitive[A, B, C](s: mutable.Set[(A, B, C)]) = {
42-
// s ++ (for ((s1, p1, o1) <- s; (s2, p2, o2) <- s if o1 == s2) yield (s1, p1, o2))
43-
// }
44-
45-
def addTransitive(s: mutable.Set[RDFTriple]) = {
46-
s ++ (for (t1 <- s; t2 <- s if t1.`object` == t2.subject) yield RDFTriple(t1.subject, t1.predicate, t2.`object`))
47-
}
48-
49-
def computeTransitiveClosure(triples: DataSet[RDFTriple]): DataSet[RDFTriple] = {
50-
if(triples.count() == 0) return triples
51-
log.info("computing TC...")
52-
53-
profile {
54-
// keep the predicate
55-
val predicate = triples.first(1).collect().head.predicate
56-
57-
// compute the TC
58-
var subjectObjectPairs = triples.map(t => (t.subject, t.`object`))
59-
60-
// because join() joins on keys, in addition the pairs are stored in reversed order (o, s)
61-
val objectSubjectPairs = subjectObjectPairs.map(t => (t._2, t._1))
62-
63-
// the join is iterated until a fixed point is reached
64-
var i = 1
65-
var oldCount = 0L
66-
var nextCount = triples.count()
67-
do {
68-
log.info(s"iteration $i...")
69-
oldCount = nextCount
70-
// perform the join (s1, o1) x (o2, s2), obtaining an DataSet of (s1=o2, (o1, s2)) pairs,
71-
// then project the result to obtain the new (s2, o1) paths.
72-
subjectObjectPairs = subjectObjectPairs
73-
.union(
74-
subjectObjectPairs
75-
.join(objectSubjectPairs).where(0).equalTo(0)
76-
.map(x => (x._2._2, x._1._2))
77-
.filter(tuple => tuple._1 != tuple._2)// omit (s1, s1)
78-
)
79-
.distinct()
80-
nextCount = subjectObjectPairs.count()
81-
i += 1
82-
} while (nextCount != oldCount)
83-
84-
println("TC has " + nextCount + " triples.")
85-
subjectObjectPairs.map(p => RDFTriple(p._1, predicate, p._2))
86-
}
87-
}
88-
89-
def computeTransitiveClosure2(triples: DataSet[RDFTriple]): DataSet[RDFTriple] = {
90-
if(triples.count() == 0) return triples
91-
log.info("computing TC...")
92-
93-
profile {
94-
// keep the predicate
95-
val predicate = triples.first(1).collect().head.predicate
96-
97-
// convert to tuples needed for the JOIN operator
98-
val subjectObjectPairs = triples.map(t => (t.subject, t.`object`))
99-
100-
// compute the TC
101-
val res = subjectObjectPairs.iterateWithTermination(10) {
102-
prevPaths: DataSet[(String, String)] =>
103-
104-
val nextPaths = prevPaths
105-
.join(subjectObjectPairs).where(1).equalTo(0) {
106-
(left, right) => (left._1, right._2)
107-
}
108-
.union(prevPaths)
109-
.groupBy(0, 1)
110-
.reduce((l ,r) => l)
111-
112-
val terminate = prevPaths
113-
.coGroup(nextPaths)
114-
.where(0).equalTo(0) {
115-
(prev, next, out: Collector[(String, String)]) => {
116-
val prevPaths = prev.toSet
117-
for (n <- next)
118-
if (!prevPaths.contains(n)) out.collect(n)
119-
}
120-
}.withForwardedFieldsSecond("*")
121-
(nextPaths, terminate)
122-
}
123-
124-
// map back to RDF triples
125-
res.map(p => RDFTriple(p._1, predicate, p._2))
126-
}
127-
}
128-
129-
def computeTransitiveClosure[A: ClassTag: TypeInformation](edges: DataSet[(A, A)]): DataSet[(A, A)] = {
130-
log.info("computing TC...")
131-
// we keep the transitive closure cached
132-
var tc = edges
133-
134-
// because join() joins on keys, in addition the pairs are stored in reversed order (o, s)
135-
val edgesReversed = tc.map(t => (t._2, t._1))
136-
137-
// the join is iterated until a fixed point is reached
138-
var i = 1
139-
var oldCount = 0L
140-
var nextCount = tc.count()
141-
do {
142-
log.info(s"iteration $i...")
143-
oldCount = nextCount
144-
// perform the join (x, y) x (y, x), obtaining an DataSet of (x=y, (y, x)) pairs,
145-
// then project the result to obtain the new (x, y) paths.
146-
val join = tc.join(edgesReversed).where(0).equalTo(0)
147-
join.print()
148-
tc = tc
149-
.union(join.map(x => (x._2._2, x._2._1)))
150-
.distinct()
151-
nextCount = tc.count()
152-
i += 1
153-
} while (nextCount != oldCount)
154-
155-
println("TC has " + nextCount + " edges.")
156-
tc
157-
}
158-
15925
/**
16026
* Extracts all triples for the given predicate.
16127
*

sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import net.sansa_stack.inference.utils.CollectionUtils
88
import org.slf4j.LoggerFactory
99

1010
/**
11-
* A forward chaining implementation of the OWL Horst entailment regime.
11+
* A forward chaining implementation of the RDFS entailment regime.
1212
*
1313
* @constructor create a new RDFS forward chaining reasoner
1414
* @param env the Apache Flink execution environment
@@ -22,7 +22,7 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
2222
logger.info("materializing graph...")
2323
val startTime = System.currentTimeMillis()
2424

25-
var triplesDS = graph.triples // we cache this RDD because it's used quite often
25+
var triplesDS = graph.triples
2626

2727
// RDFS rules dependency was analyzed in \todo(add references) and the same ordering is used here
2828

@@ -34,14 +34,14 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
3434
* yyy rdfs:subClassOf zzz . xxx rdfs:subClassOf zzz .
3535
*/
3636
val subClassOfTriples = extractTriples(triplesDS, RDFS.subClassOf.getURI) // extract rdfs:subClassOf triples
37-
val subClassOfTriplesTrans = computeTransitiveClosure2(subClassOfTriples)//mutable.Set()++subClassOfTriples.collect())
37+
val subClassOfTriplesTrans = computeTransitiveClosureOpt(subClassOfTriples)
3838

3939
/*
4040
rdfs5 xxx rdfs:subPropertyOf yyy .
4141
yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
4242
*/
4343
val subPropertyOfTriples = extractTriples(triplesDS, RDFS.subPropertyOf.getURI) // extract rdfs:subPropertyOf triples
44-
val subPropertyOfTriplesTrans = computeTransitiveClosure(subPropertyOfTriples)//extractTriples(mutable.Set()++subPropertyOfTriples.collect(), RDFS.subPropertyOf.getURI))
44+
val subPropertyOfTriplesTrans = computeTransitiveClosureOpt(subPropertyOfTriples)
4545

4646
// a map structure should be more efficient
4747
val subClassOfMap = CollectionUtils.toMultiMap(subClassOfTriplesTrans.map(t => (t.subject, t.`object`)).collect)

0 commit comments

Comments
 (0)