Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 79d2b58

Browse files
Removed redundant RDF loading code
This has already been moved to RDF layer and can cause conflicts if both RDF and Inference dependency are used. Closes #25
1 parent 48c188e commit 79d2b58

15 files changed

Lines changed: 28 additions & 733 deletions

File tree

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@
6969
<scala.version>2.11.11</scala.version>
7070
<scala.binary.version>2.11</scala.binary.version>
7171
<spark.version>2.2.1</spark.version>
72-
<flink.version>1.3.2</flink.version>
72+
<flink.version>1.4.0</flink.version>
7373
<jena.version>3.5.0</jena.version>
74-
<sansa.stack.version>0.3.0</sansa.stack.version>
74+
<sansa.stack.version>0.3.1-SNAPSHOT</sansa.stack.version>
7575
<sansa.rdf.version>${sansa.stack.version}</sansa.rdf.version>
7676
<sansa.query.version>${sansa.stack.version}</sansa.query.version>
7777
<sansa.owl.version>${sansa.stack.version}</sansa.owl.version>
@@ -93,7 +93,7 @@
9393
<!-- RDF Layer -->
9494
<dependency>
9595
<groupId>${project.groupId}</groupId>
96-
<artifactId>sansa-rdf-spark-core</artifactId>
96+
<artifactId>sansa-rdf-spark_${scala.binary.version}</artifactId>
9797
<version>${sansa.rdf.version}</version>
9898
</dependency>
9999
<dependency>

sansa-inference-common/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
<artifactId>google-collections</artifactId>
6666
</exclusion>
6767
</exclusions>
68+
<scope>provided</scope>
6869
</dependency>
6970

7071
<!-- Guava -->

sansa-inference-spark/pom.xml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,20 @@
3333
</dependency>
3434

3535
<!-- RDF Layer -->
36-
<!--<dependency>-->
37-
<!--<groupId>${project.groupId}</groupId>-->
38-
<!--<artifactId>sansa-rdf-spark-core</artifactId>-->
39-
<!--</dependency>-->
36+
<dependency>
37+
<groupId>${project.groupId}</groupId>
38+
<artifactId>sansa-rdf-spark_${scala.binary.version}</artifactId>
39+
<exclusions>
40+
<exclusion>
41+
<groupId>org.apache.hadoop</groupId>
42+
<artifactId>hadoop-common</artifactId>
43+
</exclusion>
44+
<exclusion>
45+
<groupId>org.apache.hadoop</groupId>
46+
<artifactId>hadoop-mapreduce-client-core</artifactId>
47+
</exclusion>
48+
</exclusions>
49+
</dependency>
4050
<!--<dependency>-->
4151
<!--<groupId>net.sansa-stack</groupId>-->
4252
<!--<artifactId>sansa-rdf-partition-core</artifactId>-->
@@ -199,7 +209,7 @@
199209
<dependency>
200210
<groupId>org.apache.calcite</groupId>
201211
<artifactId>calcite-core</artifactId>
202-
<scope>provided</scope>
212+
<!--<scope>provided</scope>-->
203213
</dependency>
204214

205215
</dependencies>

sansa-inference-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister

Lines changed: 0 additions & 2 deletions
This file was deleted.

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/RDFGraphLoader.scala

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ import net.sansa_stack.inference.spark.data.model.{RDFGraph, RDFGraphDataFrame,
77
import net.sansa_stack.inference.utils.NTriplesStringToJenaTriple
88
import org.apache.jena.graph.Triple
99
import org.apache.jena.riot.Lang
10-
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}
10+
import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
1111
import org.apache.spark.{SparkConf, SparkContext}
1212
import org.slf4j.LoggerFactory
13-
import scala.language.implicitConversions
1413

14+
import scala.language.implicitConversions
1515
import org.apache.jena.vocabulary.RDF
1616

1717
/**
@@ -42,6 +42,7 @@ object RDFGraphLoader {
4242

4343
val triples = session.sparkContext
4444
.textFile(path, minPartitions) // read the text file
45+
.filter(line => !line.trim().isEmpty & !line.startsWith("#"))
4546
.map(new NTriplesStringToJenaTriple()) // convert to triple object
4647
// .repartition(minPartitions)
4748

@@ -127,7 +128,7 @@ object RDFGraphLoader {
127128
Array(splitted(0), splitted(1), splitted(2))
128129
})
129130

130-
implicit val rdfTripleEncoder = org.apache.spark.sql.Encoders.kryo[Triple]
131+
implicit val rdfTripleEncoder: Encoder[Triple] = org.apache.spark.sql.Encoders.kryo[Triple]
131132
val spark = session.sqlContext
132133

133134

@@ -195,7 +196,7 @@ object RDFGraphLoader {
195196
* @param minPartitions min number of partitions for Hadoop RDDs ([[SparkContext.defaultMinPartitions]])
196197
* @return an RDF graph based on a [[org.apache.spark.sql.DataFrame]]
197198
*/
198-
def loadFromDiskAsDataFrame(session: SparkSession, path: String, minPartitions: Int, sqlSchema: SQLSchema = SQLSchemaDefault): RDFGraphDataFrame = {
199+
def loadFromDiskAsDataFrame(session: SparkSession, path: String, minPartitions: Int = 4, sqlSchema: SQLSchema = SQLSchemaDefault): RDFGraphDataFrame = {
199200
val df = session
200201
.read
201202
.format("net.sansa_stack.inference.spark.data.loader.sql")
@@ -208,7 +209,7 @@ object RDFGraphLoader {
208209
}
209210

210211
def main(args: Array[String]): Unit = {
211-
import net.sansa_stack.inference.spark.data.loader.sql.rdf._
212+
import net.sansa_stack.rdf.spark.io.rdf._
212213

213214
val path = args(0)
214215
val lang = args(1) match {
@@ -247,8 +248,6 @@ object RDFGraphLoader {
247248

248249

249250

250-
import net.sansa_stack.inference.spark.data.loader.rdd.rdf._
251-
252251
val triplesRDD = session.sparkContext.rdf(lang)(path)
253252
triples.show(10)
254253
println(triples.count())

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/package.scala

Lines changed: 0 additions & 127 deletions
This file was deleted.

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/rdd/package.scala

Lines changed: 0 additions & 73 deletions
This file was deleted.

sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/DefaultSource.scala

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)