Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit e971967

Browse files
Added: Separate implementation of schema extractors
1 parent 2adc1bc commit e971967

4 files changed

Lines changed: 186 additions & 108 deletions

File tree

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package net.sansa_stack.inference.flink.extraction
2+
3+
import org.apache.jena.vocabulary.{OWL2, RDFS}
4+
5+
/**
6+
* An extractor of the schema for OWL Horst.
7+
*
8+
* Currently, it's supports the extraction of triples `(s,p,o)` with `p` being
9+
*
10+
* - rdfs:subClassOf
11+
* - rdfs:subPropertyOf
12+
* - rdfs:domain
13+
* - rdfs:range
14+
* - owl:equivalentProperty
15+
* - owl:equivalentClass
16+
* - owl:inverseOf
17+
* - owl:someValuesFrom
18+
* - owl:allValuesFrom
19+
* - owl:hasValue
20+
* - owl:onProperty
21+
*
22+
* or `o` being
23+
*
24+
* - owl:TransitiveProperty
25+
* - owl:FunctionalProperty
26+
* - owl:InverseFunctionalProperty
27+
* - owl:SymmetricProperty
28+
*
29+
* @author Lorenz Buehmann
30+
*/
31+
class OWLHorstSchemaExtractor()
32+
extends SchemaExtractor()(
33+
Set(
34+
RDFS.subClassOf,
35+
RDFS.subPropertyOf,
36+
RDFS.domain,
37+
RDFS.range,
38+
OWL2.equivalentProperty,
39+
OWL2.equivalentClass,
40+
OWL2.inverseOf,
41+
OWL2.someValuesFrom,
42+
OWL2.allValuesFrom,
43+
OWL2.hasValue,
44+
OWL2.onProperty
45+
).map(p => p.getURI)
46+
)(
47+
Set(
48+
OWL2.TransitiveProperty,
49+
OWL2.FunctionalProperty,
50+
OWL2.InverseFunctionalProperty,
51+
OWL2.SymmetricProperty
52+
).map(p => p.getURI)
53+
) {}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package net.sansa_stack.inference.flink.extraction
2+
3+
import org.apache.jena.vocabulary.RDFS
4+
5+
/**
6+
* An extractor of the schema for RDFS.
7+
*
8+
* Currently, it's supports the extraction of triples `(s,p,o)` with `p` being
9+
*
10+
* - rdfs:subClassOf
11+
* - rdfs:subPropertyOf
12+
* - rdfs:domain
13+
* - rdfs:range
14+
*
15+
* @author Lorenz Buehmann
16+
*/
17+
class RDFSSchemaExtractor()
18+
extends SchemaExtractor()(Set(RDFS.subClassOf, RDFS.subPropertyOf, RDFS.domain, RDFS.range).map(p => p.getURI))() {}
19+
20+
object RDFSSchemaExtractor {
21+
def apply: RDFSSchemaExtractor = new RDFSSchemaExtractor()
22+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package net.sansa_stack.inference.flink.extraction
2+
3+
import org.apache.flink.api.scala.DataSet
4+
import org.apache.jena.vocabulary.RDFS
5+
6+
import net.sansa_stack.inference.data.RDFTriple
7+
import net.sansa_stack.inference.flink.data.RDFGraph
8+
import net.sansa_stack.inference.utils.Logging
9+
10+
/**
11+
* @author Lorenz Buehmann
12+
*/
13+
abstract class SchemaExtractor
14+
(subjects: Set[String] = Set())
15+
(predicates: Set[String] = Set())
16+
(objects: Set[String] = Set())
17+
extends Logging with Serializable{
18+
19+
val subjectsFilter: ((RDFTriple) => Boolean) = t => subjects.contains(t.s)
20+
val predicatesFilter: ((RDFTriple) => Boolean) = t => predicates.contains(t.p)
21+
val objectsFilter: ((RDFTriple) => Boolean) = t => objects.contains(t.o)
22+
23+
private def or(ps: (RDFTriple => Boolean)*) = (a: RDFTriple) => ps.exists(_(a))
24+
25+
/**
26+
* Extract a graph that contains only the schema triples.
27+
*
28+
* @param graph the graph
29+
* @return a graph containing only the schema triples
30+
*/
31+
def extract(graph: RDFGraph): RDFGraph =
32+
new RDFGraph(extract(graph.triples))
33+
34+
/**
35+
* Extract a DataSet that contains only the schema triples.
36+
*
37+
* @param triples the triples
38+
* @return the schema triples
39+
*/
40+
def extract(triples: DataSet[RDFTriple]): DataSet[RDFTriple] =
41+
triples
42+
.filter(or(subjectsFilter, predicatesFilter, objectsFilter))
43+
.name("schema-triples")
44+
45+
}

0 commit comments

Comments
 (0)