Skip to content

Commit 2b190a6

Browse files
Cleanup of owl:hasValue precomputation
1 parent 0c62d38 commit 2b190a6

1 file changed

Lines changed: 40 additions & 94 deletions

File tree

  • components-core/src/main/java/org/dllearner/refinementoperators

components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java

Lines changed: 40 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
8080
@ConfigOption(description = "the reasoner to use")
8181
private AbstractReasonerComponent reasoner;
8282

83-
//@ConfigOption(description = "the learning algorithm")
84-
//private
85-
8683
// hierarchies
8784
@NoConfigOption
8885
private ClassHierarchy classHierarchy;
@@ -163,14 +160,10 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
163160
@ConfigOption(defaultValue = "3", description = "minimum number an individual or literal has to be seen in the " +
164161
"knowledge base before considering it for inclusion in concepts")
165162
private int frequencyThreshold = CommonConfigOptions.valueFrequencyThresholdDefault;
166-
private Map<OWLObjectPropertyExpression, Map<OWLIndividual, Integer>> valueFrequency = new HashMap<>();
167163
// data structure with identified frequent values
168164
private Map<OWLObjectPropertyExpression, Set<OWLIndividual>> frequentValues = new HashMap<>();
169165
// frequent data values
170166
private Map<OWLDataProperty, Set<OWLLiteral>> frequentDataValues = new HashMap<>();
171-
private Map<OWLDataProperty, Map<OWLLiteral, Integer>> dataValueFrequency = new HashMap<>();
172-
@ConfigOption(description = "whether to use hasValue on frequently occuring strings", defaultValue = "false")
173-
private boolean useDataHasValueConstructor = false;
174167

175168
// statistics
176169
public long mComputationTimeNs = 0;
@@ -192,6 +185,9 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
192185
@ConfigOption(description="support of has value constructor (owl:hasValue), e.g. \u2203 r.{a} ", defaultValue="false")
193186
private boolean useHasValueConstructor = false;
194187

188+
@ConfigOption(description = "support of has value constructor (owl:hasValue), e.g. \u2203 r.{20} ", defaultValue = "false")
189+
private boolean useDataHasValueConstructor = false;
190+
195191
@ConfigOption(description="support of qualified cardinality restrictions (owl:minCardinality, owl:maxCardinality, owl:exactCardinality), e.g. \u2265 3 r.C ", defaultValue="true")
196192
private boolean useCardinalityRestrictions = true;
197193

@@ -245,31 +241,45 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
245241

246242
public RhoDRDown() {}
247243

244+
/**
245+
* Copy constructor
246+
*/
248247
public RhoDRDown(RhoDRDown op) {
249248
setApplyAllFilter(op.applyAllFilter);
250249
setCardinalityLimit(op.cardinalityLimit);
251250
setClassHierarchy(op.classHierarchy);
251+
setObjectPropertyHierarchy(op.objectPropertyHierarchy);
252252
setDataPropertyHierarchy(op.dataPropertyHierarchy);
253253
setDropDisjuncts(op.dropDisjuncts);
254-
setFrequencyThreshold(op.frequencyThreshold);
255254
setInstanceBasedDisjoints(op.instanceBasedDisjoints);
256-
setObjectPropertyHierarchy(op.objectPropertyHierarchy);
257255
setReasoner(op.reasoner);
258256
setStartClass(op.startClass);
259-
setSubHierarchy(op.classHierarchy);
260257
setUseAllConstructor(op.useAllConstructor);
261-
setUseBooleanDatatypes(op.useBooleanDatatypes);
262258
setUseCardinalityRestrictions(op.useCardinalityRestrictions);
263-
setUseDataHasValueConstructor(op.useDataHasValueConstructor);
264259
setUseExistsConstructor(op.useExistsConstructor);
265-
setUseHasValueConstructor(op.useHasValueConstructor);
266260
setUseNegation(op.useNegation);
261+
setUseHasValueConstructor(op.useHasValueConstructor);
267262
setUseObjectValueNegation(op.useObjectValueNegation);
263+
setFrequencyThreshold(op.frequencyThreshold);
264+
setUseDataHasValueConstructor(op.useDataHasValueConstructor);
265+
setUseBooleanDatatypes(op.useBooleanDatatypes);
268266
setUseStringDatatypes(op.useStringDatatypes);
269267
setUseNumericDatatypes(op.useNumericDatatypes);
268+
setUseTimeDatatypes(op.useTimeDatatypes);
270269
initialized = false;
271270
}
272271

272+
private <T> Set<T> frequentObjects(Collection<? extends Collection<T>> c, int frequencyThreshold) {
273+
final int t = frequencyThreshold;
274+
return c.stream()
275+
.flatMap(Collection::stream)
276+
.collect(Collectors.collectingAndThen(Collectors.groupingBy(Function.identity(), Collectors.counting()),
277+
map -> {
278+
map.values().removeIf(v -> v < t);
279+
return map.keySet();
280+
}));
281+
}
282+
273283
@Override
274284
public void init() throws ComponentInitException {
275285
/*
@@ -291,99 +301,39 @@ public void init() throws ComponentInitException {
291301
opRanges = reasoner.getObjectPropertyRanges();
292302
dpDomains = reasoner.getDataPropertyDomains();
293303

304+
// r. some {ind}
294305
if (useHasValueConstructor) {
295306
for (OWLObjectProperty op : objectPropertyHierarchy.getEntities()) {
296-
// sets ordered by corresponding individual (which we ignore)
297-
Map<OWLIndividual, SortedSet<OWLIndividual>> propertyMembers = reasoner.getPropertyMembers(op);
298-
299-
Collection<SortedSet<OWLIndividual>> fillerSets = propertyMembers.values();
300-
301-
// compute frequency of individuals used as object
302-
Map<OWLIndividual, Integer> ind2Frequency = fillerSets.stream()
303-
.flatMap(Collection::stream)
304-
.collect(Collectors.groupingBy(Function.identity(), TreeMap::new, summingInt(s -> 1))); // (ind -> freqency)
305307

306-
// keep track of this
307-
valueFrequency.put(op, ind2Frequency);
308+
Map<OWLIndividual, SortedSet<OWLIndividual>> propertyMembers = reasoner.getPropertyMembers(op);
308309

309-
// keep only individuals with frequency > threshold
310-
Set<OWLIndividual> frequentInds = ind2Frequency.entrySet().stream()
311-
.filter(e -> e.getValue() >= frequencyThreshold) // frequency >= threshold
312-
.map(Map.Entry::getKey)
313-
.collect(Collectors.toCollection(TreeSet::new));
310+
// compute the frequency of all individuals used as object and filter by threshold
311+
Set<OWLIndividual> frequentInds = frequentObjects(propertyMembers.values(), frequencyThreshold);
314312
frequentValues.put(op, frequentInds);
315313

314+
// inv(r). some {ind}
316315
if(useInverse) {
317-
Map<OWLIndividual, Integer> opMap = new TreeMap<>();
318-
valueFrequency.put(op.getInverseProperty(), opMap);
319-
320-
frequentInds = new TreeSet<>();
321-
322-
for (Entry<OWLIndividual, SortedSet<OWLIndividual>> entry : propertyMembers
323-
.entrySet()) {
324-
OWLIndividual subject = entry.getKey();
325-
SortedSet<OWLIndividual> values = entry.getValue();
326-
327-
opMap.put(subject, values.size());
328-
329-
if (values.size() >= frequencyThreshold) {
330-
frequentInds.add(subject);
331-
}
332-
}
316+
// it's a bit easier for inverse properties since we have a mapping from each individual to
317+
// all related individuals, thus, the freuqncy of each individual as subject is just the number
318+
// of objects
319+
frequentInds = propertyMembers.entrySet().stream().collect(Collectors.collectingAndThen(
320+
Collectors.toMap(Entry::getKey, e -> e.getValue().size()), map -> {
321+
map.values().removeIf(v -> v < frequencyThreshold);
322+
return map.keySet();
323+
}));
333324
frequentValues.put(op.getInverseProperty(), frequentInds);
334325
}
335326
}
336327
}
337328

329+
// r. some {lit}
338330
if(useDataHasValueConstructor) {
339331
for(OWLDataProperty dp : dataPropertyHierarchy.getEntities()) {
340-
Map<OWLLiteral, Integer> dpMap = new TreeMap<>();
341-
dataValueFrequency.put(dp, dpMap);
342-
343-
// long s1 = System.currentTimeMillis();
344-
// ConcurrentMap<OWLLiteral, Integer> lit2frequency = reasoner.getDatatypeMembers(dp).values()
345-
// .parallelStream()
346-
// .map(set -> set.stream().collect(Collectors.toList()))
347-
// .flatMap(list -> list.stream())
348-
// .collect(Collectors.toConcurrentMap(
349-
// Function.identity(), lit -> 1, Integer::sum));
350-
// long s2 = System.currentTimeMillis();
351-
// System.out.println(s2 - s1);
352-
353-
// sets ordered by corresponding individual (which we ignore)
354-
// s1 = System.currentTimeMillis();
355-
Collection<SortedSet<OWLLiteral>> fillerSets = reasoner.getDatatypeMembers(dp).values();
356-
for(SortedSet<OWLLiteral> fillerSet : fillerSets) {
357-
for(OWLLiteral lit : fillerSet) {
358-
Integer frequency = dpMap.get(lit);
359-
360-
if(frequency != null) {
361-
dpMap.put(lit, frequency+1);
362-
} else {
363-
dpMap.put(lit, 1);
364-
}
365-
}
366-
}
367-
// s2 = System.currentTimeMillis();
368-
// System.out.println(s2 - s1);
369-
370-
// keep only frequent patterns
371-
Set<OWLLiteral> frequentInds = new TreeSet<>();
372-
for(OWLLiteral i : dpMap.keySet()) {
373-
if(dpMap.get(i) >= frequencyThreshold) {
374-
logger.trace("adding value "+i+", because "+dpMap.get(i) +">="+frequencyThreshold);
375-
frequentInds.add(i);
376-
}
377-
}
378-
frequentDataValues.put(dp, frequentInds);
332+
Set<OWLLiteral> frequentLiterals = frequentObjects(reasoner.getDatatypeMembers(dp).values(), frequencyThreshold);
333+
frequentDataValues.put(dp, frequentLiterals);
379334
}
380335
}
381336

382-
// we do not need the temporary set anymore and let the
383-
// garbage collector take care of it
384-
valueFrequency = null;
385-
dataValueFrequency.clear();// = null;
386-
387337
// compute splits for numeric data properties
388338
if(useNumericDatatypes) {
389339
if(reasoner instanceof SPARQLReasoner
@@ -1120,11 +1070,7 @@ private void computeTopRefinements(int maxLength, OWLClassExpression domain) {
11201070
// TODO: similar filtering can be done for boolean datatype
11211071
// properties
11221072
if(applyExistsFilter) {
1123-
Iterator<OWLObjectUnionOf> it = baseSet.iterator();
1124-
while(it.hasNext()) {
1125-
if(MathOperations.containsDoubleObjectSomeRestriction(it.next()))
1126-
it.remove();
1127-
}
1073+
baseSet.removeIf(MathOperations::containsDoubleObjectSomeRestriction);
11281074
}
11291075

11301076
// add computed refinements

0 commit comments

Comments
 (0)