@@ -80,9 +80,6 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
8080 @ ConfigOption (description = "the reasoner to use" )
8181 private AbstractReasonerComponent reasoner ;
8282
83- //@ConfigOption(description = "the learning algorithm")
84- //private
85-
8683 // hierarchies
8784 @ NoConfigOption
8885 private ClassHierarchy classHierarchy ;
@@ -163,14 +160,10 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
163160 @ ConfigOption (defaultValue = "3" , description = "minimum number an individual or literal has to be seen in the " +
164161 "knowledge base before considering it for inclusion in concepts" )
165162 private int frequencyThreshold = CommonConfigOptions .valueFrequencyThresholdDefault ;
166- private Map <OWLObjectPropertyExpression , Map <OWLIndividual , Integer >> valueFrequency = new HashMap <>();
167163 // data structure with identified frequent values
168164 private Map <OWLObjectPropertyExpression , Set <OWLIndividual >> frequentValues = new HashMap <>();
169165 // frequent data values
170166 private Map <OWLDataProperty , Set <OWLLiteral >> frequentDataValues = new HashMap <>();
171- private Map <OWLDataProperty , Map <OWLLiteral , Integer >> dataValueFrequency = new HashMap <>();
172- @ ConfigOption (description = "whether to use hasValue on frequently occuring strings" , defaultValue = "false" )
173- private boolean useDataHasValueConstructor = false ;
174167
175168 // statistics
176169 public long mComputationTimeNs = 0 ;
@@ -192,6 +185,9 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
192185 @ ConfigOption (description ="support of has value constructor (owl:hasValue), e.g. \u2203 r.{a} " , defaultValue ="false" )
193186 private boolean useHasValueConstructor = false ;
194187
188+ @ ConfigOption (description = "support of has value constructor (owl:hasValue), e.g. \u2203 r.{20} " , defaultValue = "false" )
189+ private boolean useDataHasValueConstructor = false ;
190+
195191 @ ConfigOption (description ="support of qualified cardinality restrictions (owl:minCardinality, owl:maxCardinality, owl:exactCardinality), e.g. \u2265 3 r.C " , defaultValue ="true" )
196192 private boolean useCardinalityRestrictions = true ;
197193
@@ -245,31 +241,45 @@ public class RhoDRDown extends RefinementOperatorAdapter implements Component, C
245241
246242 public RhoDRDown () {}
247243
244+ /**
245+ * Copy constructor
246+ */
248247 public RhoDRDown (RhoDRDown op ) {
249248 setApplyAllFilter (op .applyAllFilter );
250249 setCardinalityLimit (op .cardinalityLimit );
251250 setClassHierarchy (op .classHierarchy );
251+ setObjectPropertyHierarchy (op .objectPropertyHierarchy );
252252 setDataPropertyHierarchy (op .dataPropertyHierarchy );
253253 setDropDisjuncts (op .dropDisjuncts );
254- setFrequencyThreshold (op .frequencyThreshold );
255254 setInstanceBasedDisjoints (op .instanceBasedDisjoints );
256- setObjectPropertyHierarchy (op .objectPropertyHierarchy );
257255 setReasoner (op .reasoner );
258256 setStartClass (op .startClass );
259- setSubHierarchy (op .classHierarchy );
260257 setUseAllConstructor (op .useAllConstructor );
261- setUseBooleanDatatypes (op .useBooleanDatatypes );
262258 setUseCardinalityRestrictions (op .useCardinalityRestrictions );
263- setUseDataHasValueConstructor (op .useDataHasValueConstructor );
264259 setUseExistsConstructor (op .useExistsConstructor );
265- setUseHasValueConstructor (op .useHasValueConstructor );
266260 setUseNegation (op .useNegation );
261+ setUseHasValueConstructor (op .useHasValueConstructor );
267262 setUseObjectValueNegation (op .useObjectValueNegation );
263+ setFrequencyThreshold (op .frequencyThreshold );
264+ setUseDataHasValueConstructor (op .useDataHasValueConstructor );
265+ setUseBooleanDatatypes (op .useBooleanDatatypes );
268266 setUseStringDatatypes (op .useStringDatatypes );
269267 setUseNumericDatatypes (op .useNumericDatatypes );
268+ setUseTimeDatatypes (op .useTimeDatatypes );
270269 initialized = false ;
271270 }
272271
272+ private <T > Set <T > frequentObjects (Collection <? extends Collection <T >> c , int frequencyThreshold ) {
273+ final int t = frequencyThreshold ;
274+ return c .stream ()
275+ .flatMap (Collection ::stream )
276+ .collect (Collectors .collectingAndThen (Collectors .groupingBy (Function .identity (), Collectors .counting ()),
277+ map -> {
278+ map .values ().removeIf (v -> v < t );
279+ return map .keySet ();
280+ }));
281+ }
282+
273283 @ Override
274284 public void init () throws ComponentInitException {
275285 /*
@@ -291,99 +301,39 @@ public void init() throws ComponentInitException {
291301 opRanges = reasoner .getObjectPropertyRanges ();
292302 dpDomains = reasoner .getDataPropertyDomains ();
293303
304+ // r. some {ind}
294305 if (useHasValueConstructor ) {
295306 for (OWLObjectProperty op : objectPropertyHierarchy .getEntities ()) {
296- // sets ordered by corresponding individual (which we ignore)
297- Map <OWLIndividual , SortedSet <OWLIndividual >> propertyMembers = reasoner .getPropertyMembers (op );
298-
299- Collection <SortedSet <OWLIndividual >> fillerSets = propertyMembers .values ();
300307
301- // compute frequency of individuals used as object
302- Map <OWLIndividual , Integer > ind2Frequency = fillerSets .stream ()
303- .flatMap (Collection ::stream )
304- .collect (Collectors .groupingBy (Function .identity (), TreeMap ::new , summingInt (s -> 1 ))); // (ind -> freqency)
305-
306- // keep track of this
307- valueFrequency .put (op , ind2Frequency );
308+ Map <OWLIndividual , SortedSet <OWLIndividual >> propertyMembers = reasoner .getPropertyMembers (op );
308309
309- // keep only individuals with frequency > threshold
310- Set <OWLIndividual > frequentInds = ind2Frequency .entrySet ().stream ()
311- .filter (e -> e .getValue () >= frequencyThreshold ) // frequency >= threshold
312- .map (Map .Entry ::getKey )
313- .collect (Collectors .toCollection (TreeSet ::new ));
310+ // compute the frequency of all individuals used as object and filter by threshold
311+ Set <OWLIndividual > frequentInds = frequentObjects (propertyMembers .values (), frequencyThreshold );
314312 frequentValues .put (op , frequentInds );
315313
314+ // inv(r). some {ind}
316315 if (useInverse ) {
317- Map <OWLIndividual , Integer > opMap = new TreeMap <>();
318- valueFrequency .put (op .getInverseProperty (), opMap );
319-
320- frequentInds = new TreeSet <>();
321-
322- for (Entry <OWLIndividual , SortedSet <OWLIndividual >> entry : propertyMembers
323- .entrySet ()) {
324- OWLIndividual subject = entry .getKey ();
325- SortedSet <OWLIndividual > values = entry .getValue ();
326-
327- opMap .put (subject , values .size ());
328-
329- if (values .size () >= frequencyThreshold ) {
330- frequentInds .add (subject );
331- }
332- }
316+ // it's a bit easier for inverse properties since we have a mapping from each individual to
317+ // all related individuals, thus, the freuqncy of each individual as subject is just the number
318+ // of objects
319+ frequentInds = propertyMembers .entrySet ().stream ().collect (Collectors .collectingAndThen (
320+ Collectors .toMap (Entry ::getKey , e -> e .getValue ().size ()), map -> {
321+ map .values ().removeIf (v -> v < frequencyThreshold );
322+ return map .keySet ();
323+ }));
333324 frequentValues .put (op .getInverseProperty (), frequentInds );
334325 }
335326 }
336327 }
337328
329+ // r. some {lit}
338330 if (useDataHasValueConstructor ) {
339331 for (OWLDataProperty dp : dataPropertyHierarchy .getEntities ()) {
340- Map <OWLLiteral , Integer > dpMap = new TreeMap <>();
341- dataValueFrequency .put (dp , dpMap );
342-
343- // long s1 = System.currentTimeMillis();
344- // ConcurrentMap<OWLLiteral, Integer> lit2frequency = reasoner.getDatatypeMembers(dp).values()
345- // .parallelStream()
346- // .map(set -> set.stream().collect(Collectors.toList()))
347- // .flatMap(list -> list.stream())
348- // .collect(Collectors.toConcurrentMap(
349- // Function.identity(), lit -> 1, Integer::sum));
350- // long s2 = System.currentTimeMillis();
351- // System.out.println(s2 - s1);
352-
353- // sets ordered by corresponding individual (which we ignore)
354- // s1 = System.currentTimeMillis();
355- Collection <SortedSet <OWLLiteral >> fillerSets = reasoner .getDatatypeMembers (dp ).values ();
356- for (SortedSet <OWLLiteral > fillerSet : fillerSets ) {
357- for (OWLLiteral lit : fillerSet ) {
358- Integer frequency = dpMap .get (lit );
359-
360- if (frequency != null ) {
361- dpMap .put (lit , frequency +1 );
362- } else {
363- dpMap .put (lit , 1 );
364- }
365- }
366- }
367- // s2 = System.currentTimeMillis();
368- // System.out.println(s2 - s1);
369-
370- // keep only frequent patterns
371- Set <OWLLiteral > frequentInds = new TreeSet <>();
372- for (OWLLiteral i : dpMap .keySet ()) {
373- if (dpMap .get (i ) >= frequencyThreshold ) {
374- logger .trace ("adding value " +i +", because " +dpMap .get (i ) +">=" +frequencyThreshold );
375- frequentInds .add (i );
376- }
377- }
378- frequentDataValues .put (dp , frequentInds );
332+ Set <OWLLiteral > frequentLiterals = frequentObjects (reasoner .getDatatypeMembers (dp ).values (), frequencyThreshold );
333+ frequentDataValues .put (dp , frequentLiterals );
379334 }
380335 }
381336
382- // we do not need the temporary set anymore and let the
383- // garbage collector take care of it
384- valueFrequency = null ;
385- dataValueFrequency .clear ();// = null;
386-
387337 // compute splits for numeric data properties
388338 if (useNumericDatatypes ) {
389339 if (reasoner instanceof SPARQLReasoner
@@ -426,8 +376,8 @@ public void init() throws ComponentInitException {
426376 maxNrOfFillers .put (op , 10 );
427377 } else {
428378 int maxFillers = Math .min (cardinalityLimit ,
429- reasoner .getPropertyMembers (op ).entrySet ().stream ()
430- .mapToInt (entry -> entry . getValue (). size () )
379+ reasoner .getPropertyMembers (op ).values ().stream ()
380+ .mapToInt (Set :: size )
431381 .max ().orElse (0 ));
432382 maxNrOfFillers .put (op , maxFillers );
433383
@@ -626,9 +576,9 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
626576 refinements .add (operands .get (1 ));
627577 } else {
628578 // copy children list and remove a different element in each turn
629- for ( int i = 0 ; i < operands . size (); i ++ ) {
579+ for ( OWLClassExpression op : operands ) {
630580 List <OWLClassExpression > newChildren = new LinkedList <>(operands );
631- newChildren .remove (i );
581+ newChildren .remove (op );
632582 OWLObjectUnionOf md = new OWLObjectUnionOfImplExt (newChildren );
633583 refinements .add (md );
634584 }
@@ -639,7 +589,10 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
639589 OWLObjectPropertyExpression role = ((OWLObjectSomeValuesFrom ) description ).getProperty ();
640590 OWLClassExpression filler = ((OWLObjectSomeValuesFrom ) description ).getFiller ();
641591
642- OWLClassExpression domain = role .isAnonymous () ? opDomains .get (role .getNamedProperty ()) : opRanges .get (role );
592+ // we need the context of the filler which is either the domain (in case of an inverse property) or the range of p
593+ OWLClassExpression domain = role .isAnonymous ()
594+ ? opDomains .get (role .getNamedProperty ()) // inv(p) -> D = domain(p)
595+ : opRanges .get (role .asOWLObjectProperty ()); // p -> D = range(p)
643596
644597 // rule 1: EXISTS r.D => EXISTS r.E
645598 tmp = refine (filler , maxLength -lengthMetric .objectSomeValuesLength -lengthMetric .objectProperyLength , null , domain );
@@ -657,11 +610,11 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
657610
658611 // rule 3: EXISTS r.D => >= 2 r.D
659612 // (length increases by 1 so we have to check whether max length is sufficient)
660- if (useCardinalityRestrictions ) { // && !role.isAnonymous()) {
661- if ( maxLength > OWLClassExpressionUtils .getLength (description , lengthMetric ) && maxNrOfFillers . get ( role ) > 1 ) {
662- OWLObjectMinCardinality min = df . getOWLObjectMinCardinality ( 2 , role , filler );
663- refinements .add (min );
664- }
613+ if (useCardinalityRestrictions &&
614+ maxLength > OWLClassExpressionUtils .getLength (description , lengthMetric ) &&
615+ maxNrOfFillers . get ( role ) > 1 ) {
616+ refinements .add (df . getOWLObjectMinCardinality ( 2 , role , filler ) );
617+
665618 }
666619
667620 // rule 4: EXISTS r.TOP => EXISTS r.{value}
@@ -672,10 +625,12 @@ public Set<OWLClassExpression> refine(OWLClassExpression description, int maxLen
672625 for (OWLIndividual ind : frequentInds ) {
673626 OWLObjectHasValue ovr = df .getOWLObjectHasValue (role , ind );
674627 refinements .add (ovr );
675- if (useObjectValueNegation ){
676- refinements .add (df .getOWLObjectComplementOf (ovr ));
628+ // rule 4b : EXISTS r.TOP => EXISTS r.not {value}
629+ if (useObjectValueNegation ) {
630+ if (maxLength > OWLClassExpressionUtils .getLength (description , lengthMetric )) {
631+ refinements .add (df .getOWLObjectSomeValuesFrom (role , df .getOWLObjectComplementOf (df .getOWLObjectOneOf (ind ))));
632+ }
677633 }
678-
679634 }
680635 }
681636 }
@@ -1115,11 +1070,7 @@ private void computeTopRefinements(int maxLength, OWLClassExpression domain) {
11151070 // TODO: similar filtering can be done for boolean datatype
11161071 // properties
11171072 if (applyExistsFilter ) {
1118- Iterator <OWLObjectUnionOf > it = baseSet .iterator ();
1119- while (it .hasNext ()) {
1120- if (MathOperations .containsDoubleObjectSomeRestriction (it .next ()))
1121- it .remove ();
1122- }
1073+ baseSet .removeIf (MathOperations ::containsDoubleObjectSomeRestriction );
11231074 }
11241075
11251076 // add computed refinements
0 commit comments