Skip to content

Commit 7cbffed

Browse files
Merge remote-tracking branch 'upstream/develop' into develop
2 parents 1ed0a63 + 833385b commit 7cbffed

4 files changed

Lines changed: 350 additions & 120 deletions

File tree

components-core/src/main/java/org/dllearner/algorithms/qtl/util/SteinerTreeGeneric.java

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import java.util.ArrayList;
44
import java.util.List;
55
import java.util.Set;
6+
import java.util.function.Supplier;
67

7-
import org.jgrapht.EdgeFactory;
88
import org.jgrapht.Graph;
99
import org.jgrapht.alg.shortestpath.BellmanFordShortestPath;
1010
import org.jgrapht.alg.shortestpath.DijkstraShortestPath;
@@ -23,12 +23,12 @@ public class SteinerTreeGeneric<V, E> {
2323
Graph<V, E> graph;
2424
WeightedMultigraph<V, E> tree;
2525
List<V> steinerNodes;
26-
private final EdgeFactory<V, E> edgeFactory;
26+
private final Class<? extends E> edgeClass;
2727

28-
public SteinerTreeGeneric(Graph<V, E> graph, List<V> steinerNodes, EdgeFactory<V, E> edgeFactory) {
28+
public SteinerTreeGeneric(Graph<V, E> graph, List<V> steinerNodes, Class<? extends E> edgeClass) {
2929
this.graph = graph;
3030
this.steinerNodes = steinerNodes;
31-
this.edgeFactory = edgeFactory;
31+
this.edgeClass = edgeClass;
3232

3333
runAlgorithm();
3434
}
@@ -40,7 +40,7 @@ private Pseudograph<V, E> step1() {
4040

4141
logger.debug("<enter");
4242

43-
Pseudograph<V, E> g = new WeightedPseudograph<V, E>(edgeFactory);
43+
Pseudograph<V, E> g = new WeightedPseudograph<V, E>(edgeClass);
4444

4545
for (V n : this.steinerNodes) {
4646
g.addVertex(n);
@@ -57,8 +57,7 @@ private Pseudograph<V, E> step1() {
5757
if (g.containsEdge(n1, n2))
5858
continue;
5959

60-
E e = edgeFactory.createEdge(n1, n2);
61-
g.addEdge(n1, n2, e);
60+
E e = g.addEdge(n1, n2);
6261
g.setEdgeWeight(e, pathGen.getPathWeight(n1, n2));
6362

6463
}
@@ -87,7 +86,7 @@ private WeightedMultigraph<V, E> step2(Pseudograph<V, E> g1) {
8786

8887
Set<E> edges = mst.getSpanningTree().getEdges();
8988

90-
WeightedMultigraph<V, E> g2 = new WeightedMultigraph<>(edgeFactory);
89+
WeightedMultigraph<V, E> g2 = new WeightedMultigraph<>(edgeClass);
9190

9291
List<E> edgesSortedById = new ArrayList<>(edges);
9392
// edgesSortedById.sort();
@@ -114,7 +113,7 @@ private WeightedMultigraph<V, E> step3(WeightedMultigraph<V, E> g2) {
114113

115114
logger.debug("<enter");
116115

117-
WeightedMultigraph<V, E> g3 = new WeightedMultigraph<>(edgeFactory);
116+
WeightedMultigraph<V, E> g3 = new WeightedMultigraph<>(edgeClass);
118117

119118
Set<E> edges = g2.edgeSet();
120119
DijkstraShortestPath<V, E> pathGen = new DijkstraShortestPath<>(this.graph);
@@ -170,7 +169,7 @@ private WeightedMultigraph<V, E> step4(WeightedMultigraph<V, E> g3) {
170169

171170
Set<E> edges = mst.getSpanningTree().getEdges();
172171

173-
WeightedMultigraph<V, E> g4 = new WeightedMultigraph<>(edgeFactory);
172+
WeightedMultigraph<V, E> g4 = new WeightedMultigraph<>(edgeClass);
174173

175174
List<E> edgesSortedById = new ArrayList<>(edges);
176175
// Collections.sort(edgesSortedById);
@@ -241,7 +240,7 @@ private void runAlgorithm() {
241240
// GraphUtil.printGraph(g1);
242241

243242
if (g1.vertexSet().size() < 2) {
244-
this.tree = new WeightedMultigraph<>(edgeFactory);
243+
this.tree = new WeightedMultigraph<>(edgeClass);
245244
for (V n : g1.vertexSet()) this.tree.addVertex(n);
246245
return;
247246
}

components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java

Lines changed: 58 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
8080
@ConfigOption(description = "the reasoner to use")
8181
private AbstractReasonerComponent reasoner;
8282

83-
//@ConfigOption(description = "the learning algorithm")
84-
//private
85-
8683
// hierarchies
8784
@NoConfigOption
8885
private ClassHierarchy classHierarchy;
@@ -163,14 +160,10 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
163160
@ConfigOption(defaultValue = "3", description = "minimum number an individual or literal has to be seen in the " +
164161
"knowledge base before considering it for inclusion in concepts")
165162
private int frequencyThreshold = CommonConfigOptions.valueFrequencyThresholdDefault;
166-
private Map<OWLObjectPropertyExpression, Map<OWLIndividual, Integer>> valueFrequency = new HashMap<>();
167163
// data structure with identified frequent values
168164
private Map<OWLObjectPropertyExpression, Set<OWLIndividual>> frequentValues = new HashMap<>();
169165
// frequent data values
170166
private Map<OWLDataProperty, Set<OWLLiteral>> frequentDataValues = new HashMap<>();
171-
private Map<OWLDataProperty, Map<OWLLiteral, Integer>> dataValueFrequency = new HashMap<>();
172-
@ConfigOption(description = "whether to use hasValue on frequently occuring strings", defaultValue = "false")
173-
private boolean useDataHasValueConstructor = false;
174167

175168
// statistics
176169
public long mComputationTimeNs = 0;
@@ -192,6 +185,9 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
192185
@ConfigOption(description="support of has value constructor (owl:hasValue), e.g. \u2203 r.{a} ", defaultValue="false")
193186
private boolean useHasValueConstructor = false;
194187

188+
@ConfigOption(description = "support of has value constructor (owl:hasValue), e.g. \u2203 r.{20} ", defaultValue = "false")
189+
private boolean useDataHasValueConstructor = false;
190+
195191
@ConfigOption(description="support of qualified cardinality restrictions (owl:minCardinality, owl:maxCardinality, owl:exactCardinality), e.g. \u2265 3 r.C ", defaultValue="true")
196192
private boolean useCardinalityRestrictions = true;
197193

@@ -245,31 +241,45 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
245241

246242
public RhoDRDown() {}
247243

244+
/**
245+
* Copy constructor
246+
*/
248247
public RhoDRDown(RhoDRDown op) {
249248
setApplyAllFilter(op.applyAllFilter);
250249
setCardinalityLimit(op.cardinalityLimit);
251250
setClassHierarchy(op.classHierarchy);
251+
setObjectPropertyHierarchy(op.objectPropertyHierarchy);
252252
setDataPropertyHierarchy(op.dataPropertyHierarchy);
253253
setDropDisjuncts(op.dropDisjuncts);
254-
setFrequencyThreshold(op.frequencyThreshold);
255254
setInstanceBasedDisjoints(op.instanceBasedDisjoints);
256-
setObjectPropertyHierarchy(op.objectPropertyHierarchy);
257255
setReasoner(op.reasoner);
258256
setStartClass(op.startClass);
259-
setSubHierarchy(op.classHierarchy);
260257
setUseAllConstructor(op.useAllConstructor);
261-
setUseBooleanDatatypes(op.useBooleanDatatypes);
262258
setUseCardinalityRestrictions(op.useCardinalityRestrictions);
263-
setUseDataHasValueConstructor(op.useDataHasValueConstructor);
264259
setUseExistsConstructor(op.useExistsConstructor);
265-
setUseHasValueConstructor(op.useHasValueConstructor);
266260
setUseNegation(op.useNegation);
261+
setUseHasValueConstructor(op.useHasValueConstructor);
267262
setUseObjectValueNegation(op.useObjectValueNegation);
263+
setFrequencyThreshold(op.frequencyThreshold);
264+
setUseDataHasValueConstructor(op.useDataHasValueConstructor);
265+
setUseBooleanDatatypes(op.useBooleanDatatypes);
268266
setUseStringDatatypes(op.useStringDatatypes);
269267
setUseNumericDatatypes(op.useNumericDatatypes);
268+
setUseTimeDatatypes(op.useTimeDatatypes);
270269
initialized = false;
271270
}
272271

272+
private <T> Set<T> frequentObjects(Collection<? extends Collection<T>> c, int frequencyThreshold) {
273+
final int t = frequencyThreshold;
274+
return c.stream()
275+
.flatMap(Collection::stream)
276+
.collect(Collectors.collectingAndThen(Collectors.groupingBy(Function.identity(), Collectors.counting()),
277+
map -> {
278+
map.values().removeIf(v -> v < t);
279+
return map.keySet();
280+
}));
281+
}
282+
273283
@Override
274284
public void init() throws ComponentInitException {
275285
/*
@@ -291,99 +301,39 @@ public void init() throws ComponentInitException {
291301
opRanges = reasoner.getObjectPropertyRanges();
292302
dpDomains = reasoner.getDataPropertyDomains();
293303

304+
// r. some {ind}
294305
if (useHasValueConstructor) {
295306
for (OWLObjectProperty op : objectPropertyHierarchy.getEntities()) {
296-
// sets ordered by corresponding individual (which we ignore)
297-
Map<OWLIndividual, SortedSet<OWLIndividual>> propertyMembers = reasoner.getPropertyMembers(op);
298-
299-
Collection<SortedSet<OWLIndividual>> fillerSets = propertyMembers.values();
300307

301-
// compute frequency of individuals used as object
302-
Map<OWLIndividual, Integer> ind2Frequency = fillerSets.stream()
303-
.flatMap(Collection::stream)
304-
.collect(Collectors.groupingBy(Function.identity(), TreeMap::new, summingInt(s -> 1))); // (ind -> freqency)
305-
306-
// keep track of this
307-
valueFrequency.put(op, ind2Frequency);
308+
Map<OWLIndividual, SortedSet<OWLIndividual>> propertyMembers = reasoner.getPropertyMembers(op);
308309

309-
// keep only individuals with frequency > threshold
310-
Set<OWLIndividual> frequentInds = ind2Frequency.entrySet().stream()
311-
.filter(e -> e.getValue() >= frequencyThreshold) // frequency >= threshold
312-
.map(Map.Entry::getKey)
313-
.collect(Collectors.toCollection(TreeSet::new));
310+
// compute the frequency of all individuals used as object and filter by threshold
311+
Set<OWLIndividual> frequentInds = frequentObjects(propertyMembers.values(), frequencyThreshold);
314312
frequentValues.put(op, frequentInds);
315313

314+
// inv(r). some {ind}
316315
if(useInverse) {
317-
Map<OWLIndividual, Integer> opMap = new TreeMap<>();
318-
valueFrequency.put(op.getInverseProperty(), opMap);
319-
320-
frequentInds = new TreeSet<>();
321-
322-
for (Entry<OWLIndividual, SortedSet<OWLIndividual>> entry : propertyMembers
323-
.entrySet()) {
324-
OWLIndividual subject = entry.getKey();
325-
SortedSet<OWLIndividual> values = entry.getValue();
326-
327-
opMap.put(subject, values.size());
328-
329-
if (values.size() >= frequencyThreshold) {
330-
frequentInds.add(subject);
331-
}
332-
}
316+
// it's a bit easier for inverse properties since we have a mapping from each individual to
317+
// all related individuals, thus, the freuqncy of each individual as subject is just the number
318+
// of objects
319+
frequentInds = propertyMembers.entrySet().stream().collect(Collectors.collectingAndThen(
320+
Collectors.toMap(Entry::getKey, e -> e.getValue().size()), map -> {
321+
map.values().removeIf(v -> v < frequencyThreshold);
322+
return map.keySet();
323+
}));
333324
frequentValues.put(op.getInverseProperty(), frequentInds);
334325
}
335326
}
336327
}
337328

329+
// r. some {lit}
338330
if(useDataHasValueConstructor) {
339331
for(OWLDataProperty dp : dataPropertyHierarchy.getEntities()) {
340-
Map<OWLLiteral, Integer> dpMap = new TreeMap<>();
341-
dataValueFrequency.put(dp, dpMap);
342-
343-
// long s1 = System.currentTimeMillis();
344-
// ConcurrentMap<OWLLiteral, Integer> lit2frequency = reasoner.getDatatypeMembers(dp).values()
345-
// .parallelStream()
346-
// .map(set -> set.stream().collect(Collectors.toList()))
347-
// .flatMap(list -> list.stream())
348-
// .collect(Collectors.toConcurrentMap(
349-
// Function.identity(), lit -> 1, Integer::sum));
350-
// long s2 = System.currentTimeMillis();
351-
// System.out.println(s2 - s1);
352-
353-
// sets ordered by corresponding individual (which we ignore)
354-
// s1 = System.currentTimeMillis();
355-
Collection<SortedSet<OWLLiteral>> fillerSets = reasoner.getDatatypeMembers(dp).values();
356-
for(SortedSet<OWLLiteral> fillerSet : fillerSets) {
357-
for(OWLLiteral lit : fillerSet) {
358-
Integer frequency = dpMap.get(lit);
359-
360-
if(frequency != null) {
361-
dpMap.put(lit, frequency+1);
362-
} else {
363-
dpMap.put(lit, 1);
364-
}
365-
}
366-
}
367-
// s2 = System.currentTimeMillis();
368-
// System.out.println(s2 - s1);
369-
370-
// keep only frequent patterns
371-
Set<OWLLiteral> frequentInds = new TreeSet<>();
372-
for(OWLLiteral i : dpMap.keySet()) {
373-
if(dpMap.get(i) >= frequencyThreshold) {
374-
logger.trace("adding value "+i+", because "+dpMap.get(i) +">="+frequencyThreshold);
375-
frequentInds.add(i);
376-
}
377-
}
378-
frequentDataValues.put(dp, frequentInds);
332+
Set<OWLLiteral> frequentLiterals = frequentObjects(reasoner.getDatatypeMembers(dp).values(), frequencyThreshold);
333+
frequentDataValues.put(dp, frequentLiterals);
379334
}
380335
}
381336

382-
// we do not need the temporary set anymore and let the
383-
// garbage collector take care of it
384-
valueFrequency = null;
385-
dataValueFrequency.clear();// = null;
386-
387337
// compute splits for numeric data properties
388338
if(useNumericDatatypes) {
389339
if(reasoner instanceof SPARQLReasoner
@@ -426,8 +376,8 @@ public void init() throws ComponentInitException {
426376
maxNrOfFillers.put(op, 10);
427377
} else {
428378
int maxFillers = Math.min(cardinalityLimit,
429-
reasoner.getPropertyMembers(op).entrySet().stream()
430-
.mapToInt(entry -> entry.getValue().size())
379+
reasoner.getPropertyMembers(op).values().stream()
380+
.mapToInt(Set::size)
431381
.max().orElse(0));
432382
maxNrOfFillers.put(op, maxFillers);
433383

@@ -626,9 +576,9 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
626576
refinements.add(operands.get(1));
627577
} else {
628578
// copy children list and remove a different element in each turn
629-
for(int i=0; i<operands.size(); i++) {
579+
for (OWLClassExpression op : operands) {
630580
List<OWLClassExpression> newChildren = new LinkedList<>(operands);
631-
newChildren.remove(i);
581+
newChildren.remove(op);
632582
OWLObjectUnionOf md = new OWLObjectUnionOfImplExt(newChildren);
633583
refinements.add(md);
634584
}
@@ -639,7 +589,10 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
639589
OWLObjectPropertyExpression role = ((OWLObjectSomeValuesFrom) description).getProperty();
640590
OWLClassExpression filler = ((OWLObjectSomeValuesFrom) description).getFiller();
641591

642-
OWLClassExpression domain = role.isAnonymous() ? opDomains.get(role.getNamedProperty()) : opRanges.get(role);
592+
// we need the context of the filler which is either the domain (in case of an inverse property) or the range of p
593+
OWLClassExpression domain = role.isAnonymous()
594+
? opDomains.get(role.getNamedProperty()) // inv(p) -> D = domain(p)
595+
: opRanges.get(role.asOWLObjectProperty()); // p -> D = range(p)
643596

644597
// rule 1: EXISTS r.D => EXISTS r.E
645598
tmp = refine(filler, maxLength-lengthMetric.objectSomeValuesLength-lengthMetric.objectProperyLength, null, domain);
@@ -657,11 +610,11 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
657610

658611
// rule 3: EXISTS r.D => >= 2 r.D
659612
// (length increases by 1 so we have to check whether max length is sufficient)
660-
if(useCardinalityRestrictions) {// && !role.isAnonymous()) {
661-
if(maxLength > OWLClassExpressionUtils.getLength(description, lengthMetric) && maxNrOfFillers.get(role) > 1) {
662-
OWLObjectMinCardinality min = df.getOWLObjectMinCardinality(2,role,filler);
663-
refinements.add(min);
664-
}
613+
if (useCardinalityRestrictions &&
614+
maxLength > OWLClassExpressionUtils.getLength(description, lengthMetric) &&
615+
maxNrOfFillers.get(role) > 1) {
616+
refinements.add(df.getOWLObjectMinCardinality(2, role, filler));
617+
665618
}
666619

667620
// rule 4: EXISTS r.TOP => EXISTS r.{value}
@@ -672,10 +625,12 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
672625
for(OWLIndividual ind : frequentInds) {
673626
OWLObjectHasValue ovr = df.getOWLObjectHasValue(role, ind);
674627
refinements.add(ovr);
675-
if(useObjectValueNegation ){
676-
refinements.add(df.getOWLObjectComplementOf(ovr));
628+
// rule 4b : EXISTS r.TOP => EXISTS r.not {value}
629+
if (useObjectValueNegation) {
630+
if (maxLength > OWLClassExpressionUtils.getLength(description, lengthMetric)) {
631+
refinements.add(df.getOWLObjectSomeValuesFrom(role, df.getOWLObjectComplementOf(df.getOWLObjectOneOf(ind))));
632+
}
677633
}
678-
679634
}
680635
}
681636
}
@@ -1115,11 +1070,7 @@ private void computeTopRefinements(int maxLength, OWLClassExpression domain) {
11151070
// TODO: similar filtering can be done for boolean datatype
11161071
// properties
11171072
if(applyExistsFilter) {
1118-
Iterator<OWLObjectUnionOf> it = baseSet.iterator();
1119-
while(it.hasNext()) {
1120-
if(MathOperations.containsDoubleObjectSomeRestriction(it.next()))
1121-
it.remove();
1122-
}
1073+
baseSet.removeIf(MathOperations::containsDoubleObjectSomeRestriction);
11231074
}
11241075

11251076
// add computed refinements

0 commit comments

Comments
 (0)