2323
2424#include < arrow/filesystem/filesystem.h>
2525#include < arrow/filesystem/localfs.h>
26- #if __has_include(<arrow/filesystem/s3fs.h>)
26+ #ifdef ICEBERG_HAVE_S3
2727#include < arrow/filesystem/s3fs.h>
28- #define ICEBERG_ARROW_HAS_S3 1
29- #else
30- #define ICEBERG_ARROW_HAS_S3 0
3128#endif
3229
3330#include " iceberg/arrow/arrow_file_io.h"
@@ -40,10 +37,10 @@ namespace iceberg::arrow {
4037
4138namespace {
4239
43- bool IsS3Uri (std::string_view uri) { return uri.rfind (" s3://" , 0 ) == 0 ; }
40+ bool IsS3Uri (std::string_view uri) { return uri.starts_with (" s3://" ) ; }
4441
4542Status EnsureS3Initialized () {
46- #if ICEBERG_ARROW_HAS_S3
43+ #ifdef ICEBERG_HAVE_S3
4744 static std::once_flag init_flag;
4845 static ::arrow::Status init_status = ::arrow::Status::OK ();
4946 std::call_once (init_flag, []() {
@@ -64,7 +61,7 @@ Status EnsureS3Initialized() {
6461#endif
6562}
6663
67- #if ICEBERG_ARROW_HAS_S3
64+ #ifdef ICEBERG_HAVE_S3
6865// / \brief Configure S3Options from a properties map.
6966// /
7067// / \param properties The configuration properties map.
@@ -104,9 +101,8 @@ ::arrow::fs::S3Options ConfigureS3Options(
104101
105102 // Configure path-style access (needed for MinIO)
106103 auto path_style_it = properties.find (S3Properties::kPathStyleAccess );
107- if (path_style_it != properties.end ()) {
108- // Arrow's S3 path-style is controlled via endpoint scheme
109- // For path-style access, we need to ensure the endpoint is properly configured
104+ if (path_style_it != properties.end () && path_style_it->second == " true" ) {
105+ options.force_virtual_addressing = false ;
110106 }
111107
112108 // Configure SSL
@@ -118,12 +114,18 @@ ::arrow::fs::S3Options ConfigureS3Options(
118114 // Configure timeouts
119115 auto connect_timeout_it = properties.find (S3Properties::kConnectTimeoutMs );
120116 if (connect_timeout_it != properties.end ()) {
121- options.connect_timeout = std::stod (connect_timeout_it->second ) / 1000.0 ;
117+ double timeout_ms = std::stod (connect_timeout_it->second );
118+ if (timeout_ms >= 0 ) {
119+ options.connect_timeout = timeout_ms / 1000.0 ;
120+ }
122121 }
123122
124123 auto socket_timeout_it = properties.find (S3Properties::kSocketTimeoutMs );
125124 if (socket_timeout_it != properties.end ()) {
126- options.request_timeout = std::stod (socket_timeout_it->second ) / 1000.0 ;
125+ double timeout_ms = std::stod (socket_timeout_it->second );
126+ if (timeout_ms >= 0 ) {
127+ options.request_timeout = timeout_ms / 1000.0 ;
128+ }
127129 }
128130
129131 return options;
@@ -141,83 +143,19 @@ Result<std::shared_ptr<::arrow::fs::FileSystem>> MakeS3FileSystem(
141143}
142144#endif
143145
144- Result<std::shared_ptr<::arrow::fs::FileSystem>> ResolveFileSystemFromUri (
145- const std::string& uri, std::string* out_path) {
146- if (IsS3Uri (uri)) {
147- ICEBERG_RETURN_UNEXPECTED (EnsureS3Initialized ());
148- }
149- ICEBERG_ARROW_ASSIGN_OR_RETURN (auto fs, ::arrow::fs::FileSystemFromUri (uri, out_path));
150- return fs;
151- }
152-
153- // / \brief ArrowUriFileIO resolves FileSystem from URI for each operation.
154- // /
155- // / This implementation is thread-safe as it creates a new FileSystem instance
156- // / for each operation. However, it may be less efficient than caching the
157- // / FileSystem. S3 initialization is done once per process.
158- class ArrowUriFileIO : public FileIO {
159- public:
160- Result<std::string> ReadFile (const std::string& file_location,
161- std::optional<size_t > length) override {
162- std::string path;
163- ICEBERG_ASSIGN_OR_RAISE (auto fs, ResolveFileSystemFromUri (file_location, &path));
164- ::arrow::fs::FileInfo file_info (path);
165- if (length.has_value ()) {
166- file_info.set_size (length.value ());
167- }
168- std::string content;
169- ICEBERG_ARROW_ASSIGN_OR_RETURN (auto file, fs->OpenInputFile (file_info));
170- ICEBERG_ARROW_ASSIGN_OR_RETURN (auto file_size, file->GetSize ());
171-
172- content.resize (file_size);
173- size_t remain = file_size;
174- size_t offset = 0 ;
175- while (remain > 0 ) {
176- size_t read_length = std::min (remain, static_cast <size_t >(1024 * 1024 ));
177- ICEBERG_ARROW_ASSIGN_OR_RETURN (
178- auto read_bytes,
179- file->Read (read_length, reinterpret_cast <uint8_t *>(&content[offset])));
180- remain -= read_bytes;
181- offset += read_bytes;
182- }
183-
184- return content;
185- }
186-
187- Status WriteFile (const std::string& file_location,
188- std::string_view content) override {
189- std::string path;
190- ICEBERG_ASSIGN_OR_RAISE (auto fs, ResolveFileSystemFromUri (file_location, &path));
191- ICEBERG_ARROW_ASSIGN_OR_RETURN (auto file, fs->OpenOutputStream (path));
192- ICEBERG_ARROW_RETURN_NOT_OK (file->Write (content.data (), content.size ()));
193- ICEBERG_ARROW_RETURN_NOT_OK (file->Flush ());
194- ICEBERG_ARROW_RETURN_NOT_OK (file->Close ());
195- return {};
196- }
197-
198- Status DeleteFile (const std::string& file_location) override {
199- std::string path;
200- ICEBERG_ASSIGN_OR_RAISE (auto fs, ResolveFileSystemFromUri (file_location, &path));
201- ICEBERG_ARROW_RETURN_NOT_OK (fs->DeleteFile (path));
202- return {};
203- }
204- };
205-
206146} // namespace
207147
208148Result<std::unique_ptr<FileIO>> MakeS3FileIO (const std::string& uri) {
209149 if (!IsS3Uri (uri)) {
210150 return InvalidArgument (" S3 URI must start with s3://" );
211151 }
212- #if !ICEBERG_ARROW_HAS_S3
152+ #ifndef ICEBERG_HAVE_S3
213153 return NotImplemented (" Arrow S3 support is not enabled" );
214154#else
215- // Validate that S3 can be initialized and the URI is valid
155+ ICEBERG_RETURN_UNEXPECTED ( EnsureS3Initialized ());
216156 std::string path;
217- ICEBERG_ASSIGN_OR_RAISE (auto fs, ResolveFileSystemFromUri (uri, &path));
218- (void )path;
219- (void )fs;
220- return std::make_unique<ArrowUriFileIO>();
157+ ICEBERG_ARROW_ASSIGN_OR_RETURN (auto fs, ::arrow::fs::FileSystemFromUri (uri, &path));
158+ return std::make_unique<ArrowFileSystemFileIO>(std::move (fs));
221159#endif
222160}
223161
@@ -227,7 +165,7 @@ Result<std::unique_ptr<FileIO>> MakeS3FileIO(
227165 if (!IsS3Uri (uri)) {
228166 return InvalidArgument (" S3 URI must start with s3://" );
229167 }
230- #if !ICEBERG_ARROW_HAS_S3
168+ #ifndef ICEBERG_HAVE_S3
231169 return NotImplemented (" Arrow S3 support is not enabled" );
232170#else
233171 // If properties are empty, use the simple URI-based resolution
0 commit comments