Skip to content

Commit 111529b

Browse files
committed
Do fewer string allocations in PBF reader
1 parent daae73d commit 111529b

2 files changed

Lines changed: 37 additions & 14 deletions

File tree

include/osmium/io/detail/pbf_input_format.hpp

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,26 +67,45 @@ namespace osmium {
6767
std::string m_input_buffer{};
6868

6969
/**
70-
* Read the given number of bytes from the input queue.
70+
* Make sure the input data contains at least the specified
71+
* number of bytes.
7172
*
7273
* @param size Number of bytes to read
73-
* @returns String with the data
74-
* @throws osmium::pbf_error If size bytes can't be read
7574
*/
76-
std::string read_from_input_queue(size_t size) {
75+
void ensure_available_in_input_queue(size_t size) {
76+
if (m_input_buffer.size() < size) {
77+
m_input_buffer.reserve(size);
78+
}
7779
while (m_input_buffer.size() < size) {
7880
const std::string new_data{get_input()};
7981
if (input_done()) {
8082
throw osmium::pbf_error{"truncated data (EOF encountered)"};
8183
}
8284
m_input_buffer += new_data;
8385
}
86+
}
8487

85-
std::string output{m_input_buffer.substr(size)};
86-
m_input_buffer.resize(size);
88+
/**
89+
* Removes the specified number of bytes from the input data.
90+
*
91+
* @param size Number of bytes to remove
92+
*/
93+
void pop_from_input_queue(size_t size) {
94+
m_input_buffer.erase(0, size);
95+
}
96+
97+
/**
98+
* Read the given number of bytes from the input queue.
99+
*
100+
* @param size Number of bytes to read
101+
* @returns String with the data
102+
* @throws osmium::pbf_error If size bytes can't be read
103+
*/
104+
std::string read_from_input_queue(size_t size) {
105+
ensure_available_in_input_queue(size);
87106

88-
using std::swap;
89-
swap(output, m_input_buffer);
107+
std::string output(m_input_buffer, 0, size);
108+
pop_from_input_queue(size);
90109

91110
return output;
92111
}
@@ -100,12 +119,13 @@ namespace osmium {
100119

101120
try {
102121
// size is encoded in network byte order
103-
const std::string input_data{read_from_input_queue(sizeof(size))};
104-
const char* d = input_data.data();
122+
ensure_available_in_input_queue(sizeof(size));
123+
const char* d = m_input_buffer.data();
105124
size = (static_cast<uint32_t>(d[3])) |
106125
(static_cast<uint32_t>(d[2]) << 8U) |
107126
(static_cast<uint32_t>(d[1]) << 16U) |
108127
(static_cast<uint32_t>(d[0]) << 24U);
128+
pop_from_input_queue(sizeof(size));
109129
} catch (const osmium::pbf_error&) {
110130
return 0; // EOF
111131
}
@@ -121,7 +141,8 @@ namespace osmium {
121141
* Decode the BlobHeader. Make sure it contains the expected
122142
* type. Return the size of the following Blob.
123143
*/
124-
static size_t decode_blob_header(protozero::pbf_message<FileFormat::BlobHeader>&& pbf_blob_header, const char* expected_type) {
144+
static size_t decode_blob_header(const protozero::data_view &data, const char* expected_type) {
145+
protozero::pbf_message<FileFormat::BlobHeader> pbf_blob_header{data};
125146
protozero::data_view blob_header_type;
126147
size_t blob_header_datasize = 0;
127148

@@ -157,9 +178,11 @@ namespace osmium {
157178
return 0;
158179
}
159180

160-
const std::string blob_header{read_from_input_queue(size)};
181+
ensure_available_in_input_queue(size);
182+
const auto blob_size = decode_blob_header(protozero::data_view{m_input_buffer.data(), size}, expected_type);
183+
pop_from_input_queue(size);
161184

162-
return decode_blob_header(protozero::pbf_message<FileFormat::BlobHeader>(blob_header), expected_type);
185+
return blob_size;
163186
}
164187

165188
std::string read_from_input_queue_with_check(size_t size) {

include/osmium/io/detail/queue_util.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ namespace osmium {
140140
std::future<T> data_future;
141141
m_queue.wait_and_pop(data_future);
142142
assert(data_future.valid());
143-
data = data_future.get();
143+
data = std::move(data_future.get());
144144
if (at_end_of_data(data)) {
145145
m_has_reached_end_of_data = true;
146146
}

0 commit comments

Comments
 (0)