Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 400a53a

Browse files
Minimization rules in separate package.
1 parent 2dbef38 commit 400a53a

6 files changed

Lines changed: 678 additions & 34 deletions

File tree

sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphGenerator.scala

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,16 @@ import scala.collection.mutable.HashMap
3838
object RuleDependencyGraphGenerator extends Logging {
3939

4040
sealed trait RuleDependencyDirection
41+
4142
case object ConsumerProducer extends RuleDependencyDirection
43+
4244
case object ProducerConsumer extends RuleDependencyDirection
4345

4446
/**
4547
* Generates the rule dependency graph for a given set of rules.
4648
*
4749
* @param rules the set of rules
48-
* @param f a function that denotes whether a rule `r1` depends on another rule `r2`
50+
* @param f a function that denotes whether a rule `r1` depends on another rule `r2`
4951
* @return the rule dependency graph
5052
*/
5153
def generate(rules: Set[Rule],
@@ -95,19 +97,14 @@ object RuleDependencyGraphGenerator extends Logging {
9597
removeEdgesWithPredicateAlreadyTC _,
9698
removeCyclesIfPredicateIsTC _,
9799
removeEdgesWithCycleOverTCNode _
98-
,removeCycles _
99-
// ,prune _
100+
, removeCycles _
101+
, prune _
100102
)
101103

102-
for((rule, i) <- pruningRules.zipWithIndex) g = {
103-
println(s"$i." + "*" * 40)
104-
rule.apply(g)
105-
}
106-
var cnt = 1
107-
pruningRules.foreach(rule => {
108-
109-
110-
})
104+
// for ((rule, i) <- pruningRules.zipWithIndex) g = {
105+
// println(s"$i." + "*" * 40)
106+
// rule.apply(g)
107+
// }
111108

112109
g
113110
}
@@ -121,7 +118,7 @@ object RuleDependencyGraphGenerator extends Logging {
121118
* @param rule2 the second rule
122119
* @return whether the first rule depends on the second rule
123120
*/
124-
def dependsOn(rule1: Rule, rule2: Rule) : Boolean = {
121+
def dependsOn(rule1: Rule, rule2: Rule): Boolean = {
125122
// head of rule2
126123
val head2TriplePatterns = rule2.headTriplePatterns()
127124
// body of rule1
@@ -152,7 +149,7 @@ object RuleDependencyGraphGenerator extends Logging {
152149
* @param rule2 the second rule
153150
* @return the triple pattern on which `rule1` depends
154151
*/
155-
def dependsOnSmart(rule1: Rule, rule2: Rule) : Option[TriplePattern] = {
152+
def dependsOnSmart(rule1: Rule, rule2: Rule): Option[TriplePattern] = {
156153
// R1: B1 -> H1
157154
// R2: B2 -> H2
158155
// R2 -> R1 = ? , i.e. H2 ∩ B1
@@ -173,7 +170,7 @@ object RuleDependencyGraphGenerator extends Logging {
173170
ret = Some(tp2)
174171
}
175172

176-
if(tp1.getPredicate.isVariable && tp2.getPredicate.isVariable) {
173+
if (tp1.getPredicate.isVariable && tp2.getPredicate.isVariable) {
177174
ret = Some(tp2)
178175
}
179176
}
@@ -191,15 +188,15 @@ object RuleDependencyGraphGenerator extends Logging {
191188

192189
val loopEdge = node.outgoing.find(_.target == node)
193190

194-
if(loopEdge.isDefined) {
191+
if (loopEdge.isDefined) {
195192

196193
val edge = loopEdge.get
197194

198195
val rule = node.value
199196

200197
val isTC = RuleUtils.isTransitiveClosure(rule, edge.label.asInstanceOf[TriplePattern].getPredicate)
201198

202-
if(!isTC) {
199+
if (!isTC) {
203200
edges2Remove :+= edge
204201
debug(s"loop of node $node")
205202
}
@@ -212,10 +209,26 @@ object RuleDependencyGraphGenerator extends Logging {
212209
new RuleDependencyGraph(newNodes, newEdges)
213210
}
214211

212+
def sameElements(this1: Traversable[_], that: Traversable[_]): Boolean = {
213+
this1.size == that.size && {
214+
val thisList = this1.to[List]
215+
// thisList.indexOf(that.head) may fail due to asymmetric equality
216+
val idx = thisList.indexWhere(_ == that.head)
217+
if (idx >= 0) {
218+
val thisDoubled = thisList ++ thisList.tail
219+
val thatList = that.to[List]
220+
(thisDoubled startsWith(thatList, idx)) ||
221+
(thisDoubled startsWith(thatList.reverse, idx))
222+
}
223+
else false
224+
}
225+
}
226+
215227
def removeCycles(graph: RuleDependencyGraph): RuleDependencyGraph = {
216228
debug("removing redundant cycles")
217-
var edges2Remove = Seq[Graph[Rule, LDiEdge]#EdgeT]()
229+
var edges2Remove = collection.mutable.Set[Graph[Rule, LDiEdge]#EdgeT]()
218230

231+
graph.findCycle
219232

220233
// convert to JGraphT graph for algorithms not contained in Scala Graph API
221234
val g = GraphUtils.asJGraphtRuleSetGraph(graph)
@@ -237,44 +250,73 @@ object RuleDependencyGraphGenerator extends Logging {
237250
val cyclesWithNode: Buffer[Buffer[Rule]] = allCycles.asScala.filter(cycle => cycle.contains(node.value)).map(cycle => cycle.asScala)
238251

239252
// cycles that use the same property
240-
val cyclesWithNodeSameProp = cyclesWithNode.map(cycle => {
253+
val cyclesWithNodeSameProp: Map[Node, scala.List[Buffer[graph.EdgeT]]] = cyclesWithNode.map(cycle => {
241254

242255
debug("Cycle: " + cycle.mkString(", "))
256+
257+
// pairs of rules (r1, r2)
243258
var pairsOfRules = cycle zip cycle.tail
244259
pairsOfRules :+= (cycle.last, cycle(0))
245260

246-
val edges = pairsOfRules.map(e => {
261+
// map to list of edges
262+
val edges: Buffer[graph.EdgeT] = pairsOfRules.map(e => {
247263
val node1 = graph get e._1
248264
val node2 = graph get e._2
249265

250266
node1.outgoing.filter(_.target == node2)
251267
}).flatten
252268
debug("Edges: " + edges.mkString(", "))
253269

270+
// map to edge labels, i.e. the predicates
254271
var predicates = edges.map(_.label.asInstanceOf[TriplePattern].getPredicate)
255272
if(predicates.forall(_.isVariable)) predicates = ArrayBuffer(NodeFactory.createVariable("p"))
256-
debug("predicates:" + predicates)
273+
debug("predicates:" + predicates)
274+
275+
// return predicate if it's commonly used for all edges
257276
val samePred = predicates.size == 1
258277
if (samePred) Some(predicates(0), edges) else None
259278
}).filter(_.isDefined).map(_.get).groupBy(e => e._1).mapValues(e => e.map(x => x._2).toList)
260279

280+
var removedCycles: collection.mutable.Set[Buffer[graph.EdgeT]] = collection.mutable.Set()
281+
261282
val tmp: Map[Node, Map[Int, List[Buffer[graph.EdgeT]]]] = cyclesWithNodeSameProp.mapValues(value => value.map(cycle => (cycle.size, cycle)).groupBy(_._1).mapValues(e => e.map(x => x._2).toList))
262283

263284
tmp.foreach(predicate2Cycles => {
264285
debug("predicate: " + predicate2Cycles._1)
265-
predicate2Cycles._2.foreach(entry => {
266-
debug(s"length ${entry._1}")
267-
val prop2Cycle = entry._2
268-
var pairsOfCycles = prop2Cycle zip prop2Cycle.tail
269-
pairsOfCycles.foreach(pair => {
270-
debug(pair._1.map(_.source) + " ???? " + pair._2.map(_.source))
271-
272-
if(pair._1.map(_.source).toSet == pair._2.map(_.source).toSet) {
273-
debug("redundant cycle " + pair._1.map(_.source.value.getName))
274-
}
275-
})
286+
287+
predicate2Cycles._2.foreach(entry => {
288+
debug(s"length ${entry._1}")
289+
290+
val prop2Cycle = entry._2
291+
var pairsOfCycles = prop2Cycle zip prop2Cycle.tail
292+
pairsOfCycles.foreach(pair => {
293+
val cycle1 = pair._1
294+
val cycle2 = pair._2
295+
296+
val cycle1Nodes = cycle1.map(_.source).toSet
297+
val cycle2Nodes = cycle2.map(_.source).toSet
298+
299+
debug(cycle1Nodes.map(_.value.getName).mkString(", ") + " ???? " + cycle2Nodes.map(_.value.getName).mkString(", "))
300+
301+
// check if both cycles contain the same nodes
302+
if(cycle1Nodes == cycle2Nodes) {
303+
debug("redundant cycle " + pair._1.map(_.source.value.getName))
304+
305+
// we can remove cycle1 if cycle2 wasn't removed before
306+
if (!removedCycles.exists(c => sameElements(c, cycle2))) {
307+
removedCycles += cycle1
308+
}
309+
}
310+
})
276311
})
277312
})
313+
314+
removedCycles.map(c => c.map(_.asInstanceOf[Graph[Rule, LDiEdge]#EdgeT])).foreach(c =>
315+
{
316+
edges2Remove ++= c
317+
})
318+
319+
278320
// check for cycles over the same nodes via the same predicate in multiple directions
279321
// val grouped = cyclesWithNodeSameProp.groupBy(_._2)
280322
// grouped.foreach(e => {
@@ -336,10 +378,11 @@ debug("predicates:" + predicates)
336378
val edge2 = node.innerEdgeTraverser.filter(e => e.source == node && e.target == n2).head
337379

338380
// n --p--> n1
339-
val path1 = node.withSubgraph(edges = !_.equals(edge2)) pathTo n2
381+
val path1 = node.withSubgraph(edges = e => !e.equals(edge2) && !redundantEdges.contains(e)) pathTo n2
340382
if (path1.isDefined) {
341383
debug(s"PATH TO ${n2.value.getName}: ${path1.get.edges.toList.map(edge => asString(edge))}")
342384
val edges = path1.get.edges.toList
385+
343386
edges.foreach(edge => {
344387
debug(s"EDGE:${asString(edge)}")
345388
})
@@ -353,7 +396,7 @@ debug("predicates:" + predicates)
353396
debug(s"NO OTHER PATH FROM ${node.value.getName} TO ${n2.value.getName}")
354397
}
355398

356-
val path2 = node.withSubgraph(edges = !_.equals(edge1)) pathTo n1
399+
val path2 = node.withSubgraph(edges = e => !e.equals(edge1) && !redundantEdges.contains(e)) pathTo n1
357400
if (path2.isDefined) {
358401
debug(s"PATH TO:${n1.value.getName}")
359402
debug(s"PATH:${path2.get.edges.toList.map(edge => asString(edge))}")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package net.sansa_stack.inference.rules.minimizer
2+
3+
/**
4+
* @author Lorenz Buehmann
5+
*/
6+
class DefaultRuleDependencyGraphMinimizer extends RuleDependencyGraphMinimizer {
7+
8+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package net.sansa_stack.inference.rules.minimizer
2+
3+
import net.sansa_stack.inference.rules.RuleDependencyGraph
4+
import net.sansa_stack.inference.utils.Logging
5+
6+
/**
7+
* A minimization rule.
8+
*
9+
* @author Lorenz Buehmann
10+
*/
11+
abstract class MinimizationRule extends Logging {
12+
13+
/** Name for this rule, automatically inferred based on class name. */
14+
val ruleName: String = {
15+
val className = getClass.getName
16+
if (className endsWith "$") className.dropRight(1) else className
17+
}
18+
19+
def apply(graph: RuleDependencyGraph): RuleDependencyGraph
20+
21+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
package net.sansa_stack.inference.rules.minimizer
2+
3+
import scala.collection.JavaConverters._
4+
5+
import com.google.common.util.concurrent.AtomicLongMap
6+
7+
import net.sansa_stack.inference.rules.RuleDependencyGraph
8+
import net.sansa_stack.inference.utils.Logging
9+
10+
object MinimizationRuleExecutor {
11+
protected val timeMap = AtomicLongMap.create[String]()
12+
13+
/** Resets statistics about time spent running specific rules */
14+
def resetTime(): Unit = timeMap.clear()
15+
16+
/** Dump statistics about time spent running specific rules. */
17+
def dumpTimeSpent(): String = {
18+
val map = timeMap.asMap().asScala
19+
val maxSize = map.keys.map(_.toString.length).max
20+
map.toSeq.sortBy(_._2).reverseMap { case (k, v) =>
21+
s"${k.padTo(maxSize, " ").mkString} $v"
22+
}.mkString("\n", "\n", "")
23+
}
24+
}
25+
26+
/**
27+
* @author Lorenz Buehmann
28+
*/
29+
abstract class MinimizationRuleExecutor extends Logging {
30+
31+
/**
32+
* An execution strategy for rules that indicates the maximum number of executions. If the
33+
* execution reaches fix point (i.e. converge) before maxIterations, it will stop.
34+
*/
35+
abstract class Strategy { def maxIterations: Int }
36+
37+
/** A strategy that only runs once. */
38+
case object Once extends Strategy { val maxIterations = 1 }
39+
40+
/** A strategy that runs until fix point or maxIterations times, whichever comes first. */
41+
case class FixedPoint(maxIterations: Int) extends Strategy
42+
43+
/** A batch of rules. */
44+
protected case class Batch(name: String, strategy: Strategy, rules: MinimizationRule*)
45+
46+
/** Defines a sequence of rule batches, to be overridden by the implementation. */
47+
protected def batches: Seq[Batch]
48+
49+
50+
/**
51+
* Executes the batches of rules defined by the subclass. The batches are executed serially
52+
* using the defined execution strategy. Within each batch, rules are also executed serially.
53+
*/
54+
def execute(graph: RuleDependencyGraph): RuleDependencyGraph = {
55+
var curGraph = graph
56+
57+
batches.foreach { batch =>
58+
val batchStartGraph = curGraph
59+
var iteration = 1
60+
var lastGraph = curGraph
61+
var continue = true
62+
63+
// Run until fix point (or the max number of iterations as specified in the strategy.
64+
while (continue) {
65+
curGraph = batch.rules.foldLeft(curGraph) {
66+
case (graph, rule) =>
67+
val startTime = System.nanoTime()
68+
val result = rule(graph)
69+
val runTime = System.nanoTime() - startTime
70+
MinimizationRuleExecutor.timeMap.addAndGet(rule.ruleName, runTime)
71+
72+
if (!result.equals(graph)) {
73+
trace(
74+
s"""
75+
|=== Applying Rule ${rule.ruleName} ===
76+
""".stripMargin)
77+
}
78+
79+
result
80+
}
81+
iteration += 1
82+
if (iteration > batch.strategy.maxIterations) {
83+
// Only log if this is a rule that is supposed to run more than once.
84+
if (iteration != 2) {
85+
val message = s"Max iterations (${iteration - 1}) reached for batch ${batch.name}"
86+
warn(message)
87+
}
88+
continue = false
89+
}
90+
91+
if (curGraph.equals(lastGraph)) {
92+
trace(
93+
s"Fixed point reached for batch ${batch.name} after ${iteration - 1} iterations.")
94+
continue = false
95+
}
96+
lastGraph = curGraph
97+
}
98+
99+
if (!batchStartGraph.equals(curGraph)) {
100+
debug(
101+
s"""
102+
|=== Result of Batch ${batch.name} ===
103+
""".stripMargin)
104+
} else {
105+
trace(s"Batch ${batch.name} has no effect.")
106+
}
107+
}
108+
109+
curGraph
110+
}
111+
}

0 commit comments

Comments
 (0)