@@ -80,9 +80,6 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
8080 @ ConfigOption (description = "the reasoner to use" )
8181 private AbstractReasonerComponent reasoner ;
8282
83- //@ConfigOption(description = "the learning algorithm")
84- //private
85-
8683 // hierarchies
8784 @ NoConfigOption
8885 private ClassHierarchy classHierarchy ;
@@ -163,14 +160,10 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
163160 @ ConfigOption (defaultValue = "3" , description = "minimum number an individual or literal has to be seen in the " +
164161 "knowledge base before considering it for inclusion in concepts" )
165162 private int frequencyThreshold = CommonConfigOptions .valueFrequencyThresholdDefault ;
166- private Map <OWLObjectPropertyExpression , Map <OWLIndividual , Integer >> valueFrequency = new HashMap <>();
167163 // data structure with identified frequent values
168164 private Map <OWLObjectPropertyExpression , Set <OWLIndividual >> frequentValues = new HashMap <>();
169165 // frequent data values
170166 private Map <OWLDataProperty , Set <OWLLiteral >> frequentDataValues = new HashMap <>();
171- private Map <OWLDataProperty , Map <OWLLiteral , Integer >> dataValueFrequency = new HashMap <>();
172- @ ConfigOption (description = "whether to use hasValue on frequently occuring strings" , defaultValue = "false" )
173- private boolean useDataHasValueConstructor = false ;
174167
175168 // statistics
176169 public long mComputationTimeNs = 0 ;
@@ -192,6 +185,9 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
192185 @ ConfigOption (description ="support of has value constructor (owl:hasValue), e.g. \u2203 r.{a} " , defaultValue ="false" )
193186 private boolean useHasValueConstructor = false ;
194187
188+ @ ConfigOption (description = "support of has value constructor (owl:hasValue), e.g. \u2203 r.{20} " , defaultValue = "false" )
189+ private boolean useDataHasValueConstructor = false ;
190+
195191 @ ConfigOption (description ="support of qualified cardinality restrictions (owl:minCardinality, owl:maxCardinality, owl:exactCardinality), e.g. \u2265 3 r.C " , defaultValue ="true" )
196192 private boolean useCardinalityRestrictions = true ;
197193
@@ -245,31 +241,45 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
245241
246242 public RhoDRDown () {}
247243
244+ /**
245+ * Copy constructor
246+ */
248247 public RhoDRDown (RhoDRDown op ) {
249248 setApplyAllFilter (op .applyAllFilter );
250249 setCardinalityLimit (op .cardinalityLimit );
251250 setClassHierarchy (op .classHierarchy );
251+ setObjectPropertyHierarchy (op .objectPropertyHierarchy );
252252 setDataPropertyHierarchy (op .dataPropertyHierarchy );
253253 setDropDisjuncts (op .dropDisjuncts );
254- setFrequencyThreshold (op .frequencyThreshold );
255254 setInstanceBasedDisjoints (op .instanceBasedDisjoints );
256- setObjectPropertyHierarchy (op .objectPropertyHierarchy );
257255 setReasoner (op .reasoner );
258256 setStartClass (op .startClass );
259- setSubHierarchy (op .classHierarchy );
260257 setUseAllConstructor (op .useAllConstructor );
261- setUseBooleanDatatypes (op .useBooleanDatatypes );
262258 setUseCardinalityRestrictions (op .useCardinalityRestrictions );
263- setUseDataHasValueConstructor (op .useDataHasValueConstructor );
264259 setUseExistsConstructor (op .useExistsConstructor );
265- setUseHasValueConstructor (op .useHasValueConstructor );
266260 setUseNegation (op .useNegation );
261+ setUseHasValueConstructor (op .useHasValueConstructor );
267262 setUseObjectValueNegation (op .useObjectValueNegation );
263+ setFrequencyThreshold (op .frequencyThreshold );
264+ setUseDataHasValueConstructor (op .useDataHasValueConstructor );
265+ setUseBooleanDatatypes (op .useBooleanDatatypes );
268266 setUseStringDatatypes (op .useStringDatatypes );
269267 setUseNumericDatatypes (op .useNumericDatatypes );
268+ setUseTimeDatatypes (op .useTimeDatatypes );
270269 initialized = false ;
271270 }
272271
272+ private <T > Set <T > frequentObjects (Collection <? extends Collection <T >> c , int frequencyThreshold ) {
273+ final int t = frequencyThreshold ;
274+ return c .stream ()
275+ .flatMap (Collection ::stream )
276+ .collect (Collectors .collectingAndThen (Collectors .groupingBy (Function .identity (), Collectors .counting ()),
277+ map -> {
278+ map .values ().removeIf (v -> v < t );
279+ return map .keySet ();
280+ }));
281+ }
282+
273283 @ Override
274284 public void init () throws ComponentInitException {
275285 /*
@@ -291,99 +301,39 @@ public void init() throws ComponentInitException {
291301 opRanges = reasoner .getObjectPropertyRanges ();
292302 dpDomains = reasoner .getDataPropertyDomains ();
293303
304+ // r. some {ind}
294305 if (useHasValueConstructor ) {
295306 for (OWLObjectProperty op : objectPropertyHierarchy .getEntities ()) {
296- // sets ordered by corresponding individual (which we ignore)
297- Map <OWLIndividual , SortedSet <OWLIndividual >> propertyMembers = reasoner .getPropertyMembers (op );
298-
299- Collection <SortedSet <OWLIndividual >> fillerSets = propertyMembers .values ();
300-
301- // compute frequency of individuals used as object
302- Map <OWLIndividual , Integer > ind2Frequency = fillerSets .stream ()
303- .flatMap (Collection ::stream )
304- .collect (Collectors .groupingBy (Function .identity (), TreeMap ::new , summingInt (s -> 1 ))); // (ind -> freqency)
305307
306- // keep track of this
307- valueFrequency .put (op , ind2Frequency );
308+ Map <OWLIndividual , SortedSet <OWLIndividual >> propertyMembers = reasoner .getPropertyMembers (op );
308309
309- // keep only individuals with frequency > threshold
310- Set <OWLIndividual > frequentInds = ind2Frequency .entrySet ().stream ()
311- .filter (e -> e .getValue () >= frequencyThreshold ) // frequency >= threshold
312- .map (Map .Entry ::getKey )
313- .collect (Collectors .toCollection (TreeSet ::new ));
310+ // compute the frequency of all individuals used as object and filter by threshold
311+ Set <OWLIndividual > frequentInds = frequentObjects (propertyMembers .values (), frequencyThreshold );
314312 frequentValues .put (op , frequentInds );
315313
314+ // inv(r). some {ind}
316315 if (useInverse ) {
317- Map <OWLIndividual , Integer > opMap = new TreeMap <>();
318- valueFrequency .put (op .getInverseProperty (), opMap );
319-
320- frequentInds = new TreeSet <>();
321-
322- for (Entry <OWLIndividual , SortedSet <OWLIndividual >> entry : propertyMembers
323- .entrySet ()) {
324- OWLIndividual subject = entry .getKey ();
325- SortedSet <OWLIndividual > values = entry .getValue ();
326-
327- opMap .put (subject , values .size ());
328-
329- if (values .size () >= frequencyThreshold ) {
330- frequentInds .add (subject );
331- }
332- }
316+ // it's a bit easier for inverse properties since we have a mapping from each individual to
317+ // all related individuals, thus, the freuqncy of each individual as subject is just the number
318+ // of objects
319+ frequentInds = propertyMembers .entrySet ().stream ().collect (Collectors .collectingAndThen (
320+ Collectors .toMap (Entry ::getKey , e -> e .getValue ().size ()), map -> {
321+ map .values ().removeIf (v -> v < frequencyThreshold );
322+ return map .keySet ();
323+ }));
333324 frequentValues .put (op .getInverseProperty (), frequentInds );
334325 }
335326 }
336327 }
337328
329+ // r. some {lit}
338330 if (useDataHasValueConstructor ) {
339331 for (OWLDataProperty dp : dataPropertyHierarchy .getEntities ()) {
340- Map <OWLLiteral , Integer > dpMap = new TreeMap <>();
341- dataValueFrequency .put (dp , dpMap );
342-
343- // long s1 = System.currentTimeMillis();
344- // ConcurrentMap<OWLLiteral, Integer> lit2frequency = reasoner.getDatatypeMembers(dp).values()
345- // .parallelStream()
346- // .map(set -> set.stream().collect(Collectors.toList()))
347- // .flatMap(list -> list.stream())
348- // .collect(Collectors.toConcurrentMap(
349- // Function.identity(), lit -> 1, Integer::sum));
350- // long s2 = System.currentTimeMillis();
351- // System.out.println(s2 - s1);
352-
353- // sets ordered by corresponding individual (which we ignore)
354- // s1 = System.currentTimeMillis();
355- Collection <SortedSet <OWLLiteral >> fillerSets = reasoner .getDatatypeMembers (dp ).values ();
356- for (SortedSet <OWLLiteral > fillerSet : fillerSets ) {
357- for (OWLLiteral lit : fillerSet ) {
358- Integer frequency = dpMap .get (lit );
359-
360- if (frequency != null ) {
361- dpMap .put (lit , frequency +1 );
362- } else {
363- dpMap .put (lit , 1 );
364- }
365- }
366- }
367- // s2 = System.currentTimeMillis();
368- // System.out.println(s2 - s1);
369-
370- // keep only frequent patterns
371- Set <OWLLiteral > frequentInds = new TreeSet <>();
372- for (OWLLiteral i : dpMap .keySet ()) {
373- if (dpMap .get (i ) >= frequencyThreshold ) {
374- logger .trace ("adding value " +i +", because " +dpMap .get (i ) +">=" +frequencyThreshold );
375- frequentInds .add (i );
376- }
377- }
378- frequentDataValues .put (dp , frequentInds );
332+ Set <OWLLiteral > frequentLiterals = frequentObjects (reasoner .getDatatypeMembers (dp ).values (), frequencyThreshold );
333+ frequentDataValues .put (dp , frequentLiterals );
379334 }
380335 }
381336
382- // we do not need the temporary set anymore and let the
383- // garbage collector take care of it
384- valueFrequency = null ;
385- dataValueFrequency .clear ();// = null;
386-
387337 // compute splits for numeric data properties
388338 if (useNumericDatatypes ) {
389339 if (reasoner instanceof SPARQLReasoner
@@ -1120,11 +1070,7 @@ private void computeTopRefinements(int maxLength, OWLClassExpression domain) {
11201070 // TODO: similar filtering can be done for boolean datatype
11211071 // properties
11221072 if (applyExistsFilter ) {
1123- Iterator <OWLObjectUnionOf > it = baseSet .iterator ();
1124- while (it .hasNext ()) {
1125- if (MathOperations .containsDoubleObjectSomeRestriction (it .next ()))
1126- it .remove ();
1127- }
1073+ baseSet .removeIf (MathOperations ::containsDoubleObjectSomeRestriction );
11281074 }
11291075
11301076 // add computed refinements
0 commit comments