Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit bb1a149

Browse files
Merge branch 'jena' into develop
2 parents c43472e + ad0f982 commit bb1a149

82 files changed

Lines changed: 2082 additions & 1425 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pom.xml

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>net.sansa-stack</groupId>
55
<artifactId>sansa-inference-parent_2.11</artifactId>
6-
<version>0.2.1-SNAPSHOT</version>
6+
<version>0.2.1-jena-SNAPSHOT</version>
77
<packaging>pom</packaging>
88
<name>Inference API - Parent</name>
99

@@ -66,7 +66,7 @@
6666
<!--<java.version>>=1.8</java.version>-->
6767
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
6868
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
69-
<scala.version>2.11.8</scala.version>
69+
<scala.version>2.11.11</scala.version>
7070
<scala.binary.version>2.11</scala.binary.version>
7171
<spark.version>2.2.0</spark.version>
7272
<flink.version>1.3.1</flink.version>
@@ -75,7 +75,7 @@
7575
<sansa.rdf.version>${sansa.stack.version}</sansa.rdf.version>
7676
<sansa.query.version>${sansa.stack.version}</sansa.query.version>
7777
<sansa.owl.version>${sansa.stack.version}</sansa.owl.version>
78-
<jsa.subversion>1-1</jsa.subversion>
78+
<jsa.subversion>1</jsa.subversion>
7979
<jsa.version>${jena.version}-${jsa.subversion}</jsa.version>
8080
<PermGen>64m</PermGen>
8181
<MaxPermGen>512m</MaxPermGen>
@@ -130,6 +130,12 @@
130130
<version>${scala.version}</version>
131131
</dependency>
132132

133+
<dependency>
134+
<groupId>org.apache.hadoop</groupId>
135+
<artifactId>hadoop-client</artifactId>
136+
<version>2.7.2</version>
137+
</dependency>
138+
133139
<!-- Apache Spark Core -->
134140
<dependency>
135141
<groupId>org.apache.spark</groupId>
@@ -247,6 +253,14 @@
247253
<version>3.6.0</version>
248254
</dependency>
249255

256+
<!-- Config API -->
257+
<dependency>
258+
<groupId>com.typesafe</groupId>
259+
<artifactId>config</artifactId>
260+
<version>1.3.1</version>
261+
</dependency>
262+
263+
250264

251265
</dependencies>
252266
</dependencyManagement>

sansa-inference-common/pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
<parent>
55
<artifactId>sansa-inference-parent_2.11</artifactId>
66
<groupId>net.sansa-stack</groupId>
7-
<version>0.2.1-SNAPSHOT</version>
7+
<version>0.2.1-jena-SNAPSHOT</version>
88
<relativePath>../pom.xml</relativePath>
99
</parent>
1010
<groupId>net.sansa-stack</groupId>
1111
<artifactId>sansa-inference-common_${scala.binary.version}</artifactId>
12-
<version>0.2.1-SNAPSHOT</version>
12+
<version>0.2.1-jena-SNAPSHOT</version>
1313
<name>Inference API - Common</name>
1414
<description>A set of common objects used in the Inference API</description>
1515

@@ -31,12 +31,12 @@
3131
<dependency>
3232
<groupId>org.apache.jena</groupId>
3333
<artifactId>jena-tdb</artifactId>
34-
<version>3.2.0</version>
34+
<version>3.3.0</version>
3535
</dependency>
3636
<dependency>
3737
<groupId>org.apache.jena</groupId>
3838
<artifactId>jena-cmds</artifactId>
39-
<version>3.2.0</version>
39+
<version>3.3.0</version>
4040
</dependency>
4141

4242
<!-- Graph API -->

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/AbstractRDFGraph.scala renamed to sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/AbstractRDFGraph.scala

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
package net.sansa_stack.inference.spark.data.model
2-
3-
import org.apache.jena.graph.Triple
4-
import org.apache.spark.rdd.RDD
5-
import org.apache.spark.sql.{DataFrame, SparkSession}
6-
7-
import net.sansa_stack.inference.data.{RDFTriple, SQLSchema, SQLSchemaDefault}
1+
package net.sansa_stack.inference.data
82

93
/**
104
* A data structure that comprises a collection of triples. Note, due to the implementation of the Spark
@@ -14,8 +8,9 @@ import net.sansa_stack.inference.data.{RDFTriple, SQLSchema, SQLSchemaDefault}
148
* @author Lorenz Buehmann
159
*
1610
*/
17-
abstract class AbstractRDFGraph[T, G <: AbstractRDFGraph[T, G]](val triples: T) { self: G =>
18-
11+
abstract class AbstractRDFGraph[Rdf<: RDF, D, G <: AbstractRDFGraph[Rdf, D, G]](
12+
val triples: D
13+
) { self: G =>
1914

2015
/**
2116
* Returns a new RDF graph that contains only triples matching the given input.
@@ -25,20 +20,24 @@ abstract class AbstractRDFGraph[T, G <: AbstractRDFGraph[T, G]](val triples: T)
2520
* @param o the object
2621
* @return a new RDF graph
2722
*/
28-
def find(s: Option[String] = None, p: Option[String] = None, o: Option[String] = None): G
23+
def find(s: Option[Rdf#Node] = None, p: Option[Rdf#Node] = None, o: Option[Rdf#Node] = None): G
24+
25+
// /**
26+
// * Returns a new RDF graph that contains only triples matching the given input.
27+
// *
28+
// * @param filter the filter function
29+
// * @return a new RDF graph
30+
// */
31+
// def find(filter: (Rdf#Triple) => Boolean): G
32+
//
33+
// def find(subject: Rdf#NodeMatch, predicate: Rdf#NodeMatch, obj: Rdf#NodeMatch): G
2934

3035
/**
3136
* Returns a new RDF graph that contains only triples matching the given input.
3237
*
3338
* @return a new RDF graph
3439
*/
35-
def find(triple: Triple): G = {
36-
find(
37-
if (triple.getSubject.isVariable) None else Option(triple.getSubject.toString),
38-
if (triple.getPredicate.isVariable) None else Option(triple.getPredicate.toString),
39-
if (triple.getObject.isVariable) None else Option(triple.getObject.toString)
40-
)
41-
}
40+
def find(triple: Rdf#Triple): G
4241

4342
/**
4443
* Returns a new RDF graph that contains the union of the current RDF graph with the given RDF graph.
@@ -56,6 +55,22 @@ abstract class AbstractRDFGraph[T, G <: AbstractRDFGraph[T, G]](val triples: T)
5655
*/
5756
def unionAll(graphs: Seq[G]): G
5857

58+
/**
59+
* Returns a new RDF graph that contains the intersection of the current RDF graph with the given RDF graph.
60+
*
61+
* @param graph the other RDF graph
62+
* @return the intersection of both RDF graphs
63+
*/
64+
def intersection(graph: G): G
65+
66+
/**
67+
* Returns a new RDF graph that contains the difference between the current RDF graph and the given RDF graph.
68+
*
69+
* @param graph the other RDF graph
70+
* @return the difference of both RDF graphs
71+
*/
72+
def difference(graph: G): G
73+
5974
/**
6075
* Returns a new RDF graph that does not contain duplicate triples.
6176
*/
@@ -67,22 +82,4 @@ abstract class AbstractRDFGraph[T, G <: AbstractRDFGraph[T, G]](val triples: T)
6782
* @return the number of triples in the RDF graph
6883
*/
6984
def size(): Long
70-
71-
72-
73-
74-
def toDataFrame(sparkSession: SparkSession = null, schema: SQLSchema = SQLSchemaDefault): DataFrame
75-
76-
def toRDD(): RDD[RDFTriple]
77-
78-
/**
79-
* Persist the triples RDD with the default storage level (`MEMORY_ONLY`).
80-
*/
81-
def cache(): G
82-
83-
// /**
84-
// * Broadcast the graph
85-
// */
86-
// def broadcast(): G
87-
8885
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package net.sansa_stack.inference.data
2+
3+
import org.apache.jena.datatypes.{BaseDatatype, RDFDatatype, TypeMapper}
4+
import org.apache.jena.graph.{Graph => JenaGraph, Node => JenaNode, Triple => JenaTriple, _}
5+
import org.apache.jena.rdf.model.{Literal => JenaLiteral, Seq => _}
6+
7+
import scala.collection.JavaConverters._
8+
9+
class JenaOps extends RDFOps[Jena] {
10+
11+
// graph
12+
13+
val emptyGraph: Jena#Graph = Factory.createDefaultGraph
14+
15+
def makeGraph(triples: Iterable[Jena#Triple]): Jena#Graph = {
16+
val graph: JenaGraph = Factory.createDefaultGraph
17+
triples.foreach { triple =>
18+
graph.add(triple)
19+
}
20+
graph
21+
}
22+
23+
def getTriples(graph: Jena#Graph): Iterable[Jena#Triple] =
24+
graph.find(JenaNode.ANY, JenaNode.ANY, JenaNode.ANY).asScala.to[Iterable]
25+
26+
// triple
27+
28+
def makeTriple(s: Jena#Node, p: Jena#URI, o: Jena#Node): Jena#Triple = {
29+
JenaTriple.create(s, p, o)
30+
}
31+
32+
def fromTriple(t: Jena#Triple): (Jena#Node, Jena#URI, Jena#Node) = {
33+
val s = t.getSubject
34+
val p = t.getPredicate
35+
val o = t.getObject
36+
if (p.isInstanceOf[Jena#URI])
37+
(s, p.asInstanceOf[Jena#URI], o)
38+
else
39+
throw new RuntimeException("fromTriple: predicate " + p.toString + " must be a URI")
40+
}
41+
42+
// node
43+
44+
def foldNode[T](node: Jena#Node)(funURI: Jena#URI => T, funBNode: Jena#BNode => T, funLiteral: Jena#Literal => T): T = node match {
45+
case iri: Jena#URI => funURI(iri)
46+
case bnode: Jena#BNode => funBNode(bnode)
47+
case literal: Jena#Literal => funLiteral(literal)
48+
}
49+
50+
// URI
51+
52+
def makeUri(iriStr: String): Jena#URI = { NodeFactory.createURI(iriStr).asInstanceOf[Node_URI] }
53+
54+
def fromUri(node: Jena#URI): String =
55+
if (node.isURI)
56+
node.getURI
57+
else
58+
throw new RuntimeException("fromUri: " + node.toString() + " must be a URI")
59+
60+
// bnode
61+
62+
def makeBNode(): Node_Blank = NodeFactory.createBlankNode().asInstanceOf[Node_Blank]
63+
64+
def makeBNodeLabel(label: String): Jena#BNode = {
65+
val id = BlankNodeId.create(label)
66+
NodeFactory.createBlankNode(id).asInstanceOf[Node_Blank]
67+
}
68+
69+
def fromBNode(bn: Jena#BNode): String =
70+
if (bn.isBlank)
71+
bn.getBlankNodeId.getLabelString
72+
else
73+
throw new RuntimeException("fromBNode: " + bn.toString + " must be a BNode")
74+
75+
// literal
76+
77+
// TODO the javadoc doesn't say if this is thread safe
78+
lazy val mapper = TypeMapper.getInstance
79+
80+
def jenaDatatype(datatype: Jena#URI) = {
81+
val iriString = fromUri(datatype)
82+
val typ = mapper.getTypeByName(iriString)
83+
if (typ == null) {
84+
val datatype = new BaseDatatype(iriString)
85+
mapper.registerDatatype(datatype)
86+
datatype
87+
} else {
88+
typ
89+
}
90+
}
91+
92+
val __xsdString: RDFDatatype = mapper.getTypeByName("http://www.w3.org/2001/XMLSchema#string")
93+
val __xsdStringURI: Jena#URI = makeUri("http://www.w3.org/2001/XMLSchema#string")
94+
val __rdfLangStringURI: Jena#URI = makeUri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
95+
96+
def makeLiteral(lexicalForm: String, datatype: Jena#URI): Jena#Literal =
97+
if (datatype == __xsdStringURI)
98+
NodeFactory.createLiteral(lexicalForm, null, null).asInstanceOf[Node_Literal]
99+
else
100+
NodeFactory.createLiteral(lexicalForm, null, jenaDatatype(datatype)).asInstanceOf[Node_Literal]
101+
102+
def makeLangTaggedLiteral(lexicalForm: String, lang: Jena#Lang): Jena#Literal =
103+
NodeFactory.createLiteral(lexicalForm, fromLang(lang), null).asInstanceOf[Node_Literal]
104+
105+
106+
// lang
107+
108+
def makeLang(langString: String) = langString
109+
110+
def fromLang(lang: Jena#Lang) = lang
111+
112+
113+
114+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package net.sansa_stack.inference.data
2+
3+
import org.apache.jena.graph.{Node, Triple}
4+
5+
/**
6+
* @author Lorenz Buehmann
7+
*/
8+
trait JenaRDFTripleLike extends TripleOps[Jena] {
9+
// self: Triple =>
10+
//
11+
// override def s: Node = self.getSubject
12+
// override def p: Node = self.getPredicate
13+
// override def o: Node = self.getObject
14+
}

sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDF.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,7 @@ trait RDF {
3636
*/
3737
type Lang
3838

39-
}
39+
// types for the graph traversal API
40+
type NodeMatch
41+
type NodeAny <: NodeMatch
42+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package net.sansa_stack.inference.data
2+
3+
/**
4+
* @author Lorenz Buehmann
5+
*/
6+
trait RDFOps[Rdf <: RDF] {
7+
8+
// Triple
9+
def makeTriple(s: Rdf#Node, p: Rdf#URI, o: Rdf#Node): Rdf#Triple
10+
def fromTriple(triple: Rdf#Triple): (Rdf#Node, Rdf#URI, Rdf#Node)
11+
12+
// URI
13+
def makeUri(s: String): Rdf#URI
14+
def fromUri(uri: Rdf#URI): String
15+
16+
// blank node
17+
def makeBNode(): Rdf#BNode
18+
def makeBNodeLabel(s: String): Rdf#BNode
19+
def fromBNode(bn: Rdf#BNode): String
20+
21+
// literal
22+
def makeLiteral(lexicalForm: String, datatype: Rdf#URI): Rdf#Literal
23+
def makeLangTaggedLiteral(lexicalForm: String, lang: Rdf#Lang): Rdf#Literal
24+
// def fromLiteral(literal: Rdf#Literal): (String, Rdf#URI, Option[Rdf#Lang])
25+
26+
// lang
27+
def makeLang(s: String): Rdf#Lang
28+
def fromLang(l: Rdf#Lang): String
29+
}
30+
31+
object RDFOps {
32+
def apply[Rdf <: RDF](implicit ops: RDFOps[Rdf]): RDFOps[Rdf] = ops
33+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package net.sansa_stack.inference.data
2+
3+
/**
4+
* @author Lorenz Buehmann
5+
*/
6+
class RDFVocab[Rdf <: RDF](implicit ops: RDFOps[Rdf]) {
7+
8+
import ops._
9+
10+
val `type`: Rdf#URI = makeUri("type")
11+
}
12+
13+
object RDFVocab {
14+
def apply[Rdf <: RDF](implicit ops: RDFOps[Rdf]): RDFVocab[Rdf] = new RDFVocab[Rdf]()
15+
}

0 commit comments

Comments
 (0)