Skip to content

Commit 2b2f929

Browse files
odysaclaude
andcommitted
feat: add S3 SAIL module with in-memory storage (Phase 1a+1b)
Introduce rdf4j-sail-s3, an S3-backed SAIL using LSM-tree architecture adapted from RisingWave's Hummock engine. This commit implements the module skeleton and in-memory storage layer: - Config: S3StoreConfig, S3StoreFactory, S3StoreSchema - Storage: Varint encoding, QuadIndex permutations, MemTable (ConcurrentSkipListMap) - Value/NS: S3ValueStore (ConcurrentHashMap ID mapping), S3NamespaceStore - Core SAIL: S3Store, S3StoreConnection, S3SailStore with SailSource/Sink/Dataset - SPI registration via META-INF/services Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ea54c51 commit 2b2f929

15 files changed

Lines changed: 2592 additions & 0 deletions

core/sail/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
<module>model</module>
2020
<module>shacl</module>
2121
<module>lmdb</module>
22+
<module>s3</module>
2223
<module>lucene-api</module>
2324
<module>lucene</module>
2425
<module>elasticsearch</module>

core/sail/s3/pom.xml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
<parent>
5+
<groupId>org.eclipse.rdf4j</groupId>
6+
<artifactId>rdf4j-sail</artifactId>
7+
<version>6.0.0-SNAPSHOT</version>
8+
</parent>
9+
<artifactId>rdf4j-sail-s3</artifactId>
10+
<name>RDF4J: S3Store</name>
11+
<description>Sail implementation that stores data on S3-compatible object storage using an LSM-tree.</description>
12+
<dependencies>
13+
<dependency>
14+
<groupId>${project.groupId}</groupId>
15+
<artifactId>rdf4j-sail-base</artifactId>
16+
<version>${project.version}</version>
17+
</dependency>
18+
<dependency>
19+
<groupId>${project.groupId}</groupId>
20+
<artifactId>rdf4j-queryalgebra-evaluation</artifactId>
21+
<version>${project.version}</version>
22+
</dependency>
23+
<dependency>
24+
<groupId>${project.groupId}</groupId>
25+
<artifactId>rdf4j-queryalgebra-model</artifactId>
26+
<version>${project.version}</version>
27+
</dependency>
28+
<dependency>
29+
<groupId>${project.groupId}</groupId>
30+
<artifactId>rdf4j-query</artifactId>
31+
<version>${project.version}</version>
32+
</dependency>
33+
<dependency>
34+
<groupId>${project.groupId}</groupId>
35+
<artifactId>rdf4j-model</artifactId>
36+
<version>${project.version}</version>
37+
</dependency>
38+
<dependency>
39+
<groupId>io.minio</groupId>
40+
<artifactId>minio</artifactId>
41+
<version>8.5.7</version>
42+
</dependency>
43+
<dependency>
44+
<groupId>org.slf4j</groupId>
45+
<artifactId>slf4j-api</artifactId>
46+
</dependency>
47+
<dependency>
48+
<groupId>com.google.guava</groupId>
49+
<artifactId>guava</artifactId>
50+
</dependency>
51+
<dependency>
52+
<groupId>${project.groupId}</groupId>
53+
<artifactId>rdf4j-sail-testsuite</artifactId>
54+
<version>${project.version}</version>
55+
<scope>test</scope>
56+
</dependency>
57+
<dependency>
58+
<groupId>${project.groupId}</groupId>
59+
<artifactId>rdf4j-repository-testsuite</artifactId>
60+
<version>${project.version}</version>
61+
<scope>test</scope>
62+
</dependency>
63+
<dependency>
64+
<groupId>${project.groupId}</groupId>
65+
<artifactId>rdf4j-repository-sail</artifactId>
66+
<version>${project.version}</version>
67+
<scope>test</scope>
68+
</dependency>
69+
<dependency>
70+
<groupId>org.junit.jupiter</groupId>
71+
<artifactId>junit-jupiter-params</artifactId>
72+
<scope>test</scope>
73+
</dependency>
74+
</dependencies>
75+
</project>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
package org.eclipse.rdf4j.sail.s3;
12+
13+
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
14+
15+
/**
16+
* Evaluation statistics for the S3 sail. Currently uses the base class's default cardinality estimation. This can be
17+
* enhanced later to query the actual storage for more accurate estimates.
18+
*/
19+
class S3EvaluationStatistics extends EvaluationStatistics {
20+
21+
@Override
22+
protected CardinalityCalculator createCardinalityCalculator() {
23+
return new S3CardinalityCalculator();
24+
}
25+
26+
protected class S3CardinalityCalculator extends CardinalityCalculator {
27+
// Uses the default cardinality estimation from the base class.
28+
// Can be enhanced to consult S3ValueStore and storage for accurate estimates.
29+
}
30+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
package org.eclipse.rdf4j.sail.s3;
12+
13+
import java.util.Iterator;
14+
import java.util.LinkedHashMap;
15+
import java.util.Map;
16+
17+
import org.eclipse.rdf4j.model.impl.SimpleNamespace;
18+
19+
/**
20+
* In-memory store for namespace prefix information. All operations are synchronized for thread safety.
21+
*/
22+
class S3NamespaceStore implements Iterable<SimpleNamespace> {
23+
24+
private final Map<String, SimpleNamespace> namespacesMap = new LinkedHashMap<>(16);
25+
26+
public synchronized String getNamespace(String prefix) {
27+
SimpleNamespace namespace = namespacesMap.get(prefix);
28+
return namespace != null ? namespace.getName() : null;
29+
}
30+
31+
public synchronized void setNamespace(String prefix, String name) {
32+
SimpleNamespace ns = namespacesMap.get(prefix);
33+
if (ns != null) {
34+
if (!ns.getName().equals(name)) {
35+
ns.setName(name);
36+
}
37+
} else {
38+
namespacesMap.put(prefix, new SimpleNamespace(prefix, name));
39+
}
40+
}
41+
42+
public synchronized void removeNamespace(String prefix) {
43+
namespacesMap.remove(prefix);
44+
}
45+
46+
@Override
47+
public synchronized Iterator<SimpleNamespace> iterator() {
48+
// return a snapshot to avoid ConcurrentModificationException
49+
return new LinkedHashMap<>(namespacesMap).values().iterator();
50+
}
51+
52+
public synchronized void clear() {
53+
namespacesMap.clear();
54+
}
55+
}

0 commit comments

Comments
 (0)