Skip to content

Commit 34f1d91

Browse files
committed
Add text header serialization
1 parent b8a2e93 commit 34f1d91

1 file changed

Lines changed: 316 additions & 2 deletions

File tree

mdio/utils/segy_export.h

Lines changed: 316 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
#include <cstring>
1919
#include <string>
2020
#include <vector>
21+
#include <sstream>
22+
#include <iomanip>
23+
#include <chrono>
24+
#include <ctime>
2125

2226
#include <algorithm> // for std::erase_if
2327
#include <thread> // for sleep_for
@@ -208,8 +212,8 @@ inline TraceHeaderComposer::TraceHeaderComposer() {
208212
{"impulse_signal_polarity", 149, "<i2"},
209213
{"vibratory_polarity_code", 151, "<i2"},
210214

211-
// # **SEG‑Y Rev1 additions (bytes 153–160ish)**
212-
{"segy_format_revision", 153, "<i2"}, // # Format revision (always 1 for Rev1)
215+
// # **SEG‑Y Rev 1 additions (bytes 153–160ish)**
216+
{"segy_format_revision", 153, "<i2"}, // # Format revision (always 1 for Rev 1)
213217
{"fixed_length_trace_flag", 155, "<i2"}, // # 1 = fixed-length traces present :contentReference[oaicite:1]{index=1}
214218
{"num_textual_hdr_ext", 157, "<i2"}, // # Number of 3200‑byte Extended Textual File Header records :contentReference[oaicite:2]{index=2}
215219

@@ -565,6 +569,15 @@ inline Result<void> TraceHeaderMapper::PopulateTraceHeader(
565569
return absl::OkStatus();
566570
}
567571

572+
// Forward declaration
573+
Result<void> CreateSegyTextHeader(
574+
const std::string& text_header,
575+
const Dataset& dataset,
576+
const std::string& mdio_path,
577+
const std::string& segy_path,
578+
const std::vector<TraceHeaderField>& overrides,
579+
const Context& ctx);
580+
568581
/**
569582
* @brief Converts an MDIO dataset to a SEG-Y styled Variable.
570583
*
@@ -626,6 +639,21 @@ Result<void> MdioToSegy(
626639
}
627640
}
628641

642+
// Create and write SEG-Y text header
643+
std::vector<TraceHeaderField> applied_overrides;
644+
for (const auto& field : trace_headers.fields()) {
645+
// Check if this field was overridden by comparing with default names
646+
if (field.name == "inline" || field.name == "crossline" ||
647+
field.name == "cdp-x" || field.name == "cdp-y") {
648+
applied_overrides.push_back(field);
649+
}
650+
}
651+
652+
auto text_header_result = CreateSegyTextHeader(text_header, ds, mdio_path, segy_path, applied_overrides, ctx);
653+
if (!text_header_result.ok()) {
654+
std::cerr << "Warning: Failed to create text header: " << text_header_result.status() << std::endl;
655+
}
656+
629657
// Pick the highest-rank (float-prefer) seismic variable
630658
bool found = false;
631659
Variable<> seismic_var;
@@ -849,6 +877,292 @@ Result<void> MdioToSegy(
849877
return absl::OkStatus();
850878
}
851879

880+
/**
881+
* @brief Creates and writes a SEG-Y text header.
882+
*
883+
* This function handles the creation of a SEG-Y text header with fallback logic:
884+
* 1. Use provided text_header if it's valid (3200 bytes)
885+
* 2. Check dataset metadata for "text_header" field
886+
* 3. Generate default header with MDIO path, date, and override info
887+
*
888+
* @param text_header Input text header string
889+
* @param dataset MDIO dataset for metadata lookup
890+
* @param mdio_path Path to input MDIO dataset
891+
* @param segy_path Path where SEG-Y output is being written
892+
* @param overrides Applied trace header overrides
893+
* @param ctx TensorStore context
894+
* @return Status of the text header creation
895+
*/
896+
Result<void> CreateSegyTextHeader(
897+
const std::string& text_header,
898+
const Dataset& dataset,
899+
const std::string& mdio_path,
900+
const std::string& segy_path,
901+
const std::vector<TraceHeaderField>& overrides,
902+
const Context& ctx) {
903+
904+
std::string final_text_header;
905+
906+
// Helper function to validate and fix SEG-Y text header format
907+
auto validate_and_fix_header = [](const std::string& header) -> std::pair<bool, std::string> {
908+
if (header.size() != 3200) {
909+
return {false, ""};
910+
}
911+
912+
// Check if it's properly formatted as 40 lines of 80 characters
913+
std::string fixed_header;
914+
for (int line = 0; line < 40; ++line) {
915+
size_t start = line * 80;
916+
if (start >= header.size()) break;
917+
918+
std::string line_content = header.substr(start, 80);
919+
920+
// Ensure line is exactly 80 characters
921+
if (line_content.size() < 80) {
922+
line_content.resize(80, ' ');
923+
} else if (line_content.size() > 80) {
924+
line_content = line_content.substr(0, 80);
925+
}
926+
927+
fixed_header += line_content;
928+
}
929+
930+
// Ensure exactly 3200 bytes
931+
if (fixed_header.size() < 3200) {
932+
fixed_header.resize(3200, ' ');
933+
} else if (fixed_header.size() > 3200) {
934+
fixed_header = fixed_header.substr(0, 3200);
935+
}
936+
937+
return {true, fixed_header};
938+
};
939+
940+
// Check if provided text header is valid (3200 bytes)
941+
if (text_header.size() == 3200) {
942+
auto [is_valid, fixed_header] = validate_and_fix_header(text_header);
943+
if (is_valid) {
944+
std::cout << "Using provided text header (3200 bytes, validated format)" << std::endl;
945+
final_text_header = fixed_header;
946+
} else {
947+
std::cout << "Provided text header has invalid format, checking dataset metadata..." << std::endl;
948+
}
949+
} else if (!text_header.empty()) {
950+
std::cout << "Provided text header invalid size (" << text_header.size()
951+
<< " bytes), checking dataset metadata..." << std::endl;
952+
} else {
953+
std::cout << "No text header provided, checking dataset metadata..." << std::endl;
954+
}
955+
956+
// Try to get text header from dataset metadata if not already set
957+
if (final_text_header.empty()) {
958+
bool found_in_metadata = false;
959+
try {
960+
auto metadata = dataset.getMetadata();
961+
if (metadata.contains("attributes") &&
962+
metadata["attributes"].contains("text_header")) {
963+
std::string metadata_header = metadata["attributes"]["text_header"].get<std::string>();
964+
if (metadata_header.size() == 3200) {
965+
auto [is_valid, fixed_header] = validate_and_fix_header(metadata_header);
966+
if (is_valid) {
967+
std::cout << "Using text header from dataset metadata (validated format)" << std::endl;
968+
final_text_header = fixed_header;
969+
found_in_metadata = true;
970+
} else {
971+
std::cout << "Text header in metadata has invalid format" << std::endl;
972+
}
973+
} else {
974+
std::cout << "Text header in metadata has invalid size ("
975+
<< metadata_header.size() << " bytes)" << std::endl;
976+
}
977+
}
978+
} catch (const std::exception& e) {
979+
std::cout << "Could not read text header from metadata: " << e.what() << std::endl;
980+
}
981+
982+
// Generate default text header if not found
983+
if (!found_in_metadata) {
984+
std::cout << "Generating default text header..." << std::endl;
985+
986+
// Get current date
987+
auto now = std::chrono::system_clock::now();
988+
auto time_t = std::chrono::system_clock::to_time_t(now);
989+
auto tm = *std::localtime(&time_t);
990+
char date_str[32];
991+
std::strftime(date_str, sizeof(date_str), "%Y-%m-%d %H:%M:%S", &tm);
992+
993+
// Helper function to format a line to exactly 80 characters
994+
auto format_line = [](int line_num, const std::string& content) -> std::string {
995+
std::string line = "C" + std::to_string(line_num);
996+
if (line_num < 10) line = "C " + std::to_string(line_num); // Add space for single digits
997+
line += " " + content;
998+
999+
// Pad or truncate to exactly 80 characters
1000+
if (line.length() < 80) {
1001+
line.resize(80, ' ');
1002+
} else if (line.length() > 80) {
1003+
line = line.substr(0, 80);
1004+
}
1005+
return line;
1006+
};
1007+
1008+
// Create header lines
1009+
std::vector<std::string> lines;
1010+
lines.push_back(format_line(1, "SEG-Y file created from MDIO dataset"));
1011+
lines.push_back(format_line(2, ""));
1012+
1013+
// Handle potentially long MDIO path - split if necessary
1014+
std::string path_prefix = "Source MDIO path: ";
1015+
std::string full_path_line = path_prefix + mdio_path;
1016+
if (full_path_line.length() <= 76) { // 80 - "C# " = 76 chars max
1017+
lines.push_back(format_line(3, full_path_line));
1018+
lines.push_back(format_line(4, "Created: " + std::string(date_str)));
1019+
lines.push_back(format_line(5, ""));
1020+
} else {
1021+
// Split long path across multiple lines
1022+
lines.push_back(format_line(3, path_prefix));
1023+
1024+
// Find a good break point in the path
1025+
std::string remaining_path = mdio_path;
1026+
int current_line = 4;
1027+
while (!remaining_path.empty() && current_line <= 39) {
1028+
int max_path_chars = 74; // 76 - 2 for indentation
1029+
if (remaining_path.length() <= max_path_chars) {
1030+
lines.push_back(format_line(current_line++, " " + remaining_path));
1031+
break;
1032+
} else {
1033+
// Find break point (prefer slash or dash)
1034+
int break_point = max_path_chars;
1035+
for (int i = max_path_chars - 1; i >= max_path_chars - 20 && i >= 0; --i) {
1036+
if (remaining_path[i] == '/' || remaining_path[i] == '-') {
1037+
break_point = i + 1; // Include the separator
1038+
break;
1039+
}
1040+
}
1041+
1042+
std::string path_part = remaining_path.substr(0, break_point);
1043+
lines.push_back(format_line(current_line++, " " + path_part));
1044+
remaining_path = remaining_path.substr(break_point);
1045+
}
1046+
}
1047+
1048+
lines.push_back(format_line(current_line++, "Created: " + std::string(date_str)));
1049+
lines.push_back(format_line(current_line++, ""));
1050+
}
1051+
1052+
int line_num = lines.size() + 1;
1053+
if (!overrides.empty()) {
1054+
lines.push_back(format_line(line_num++, "Applied trace header overrides:"));
1055+
1056+
// Calculate how many overrides we can fit in remaining lines
1057+
int available_lines = 40 - line_num;
1058+
int overrides_to_show = std::min(static_cast<int>(overrides.size()), available_lines - 1); // -1 for potential "..." line
1059+
1060+
for (int i = 0; i < overrides_to_show && line_num <= 39; ++i) {
1061+
const auto& override = overrides[i];
1062+
std::string override_info = "- " + override.name + " at byte " + std::to_string(override.offset);
1063+
1064+
// Check if this override description fits in one line (76 chars max)
1065+
if (override_info.length() <= 76) {
1066+
lines.push_back(format_line(line_num++, override_info));
1067+
} else {
1068+
// Truncate long override descriptions
1069+
std::string truncated = override_info.substr(0, 73) + "..."; // 76 - 3 for "..."
1070+
lines.push_back(format_line(line_num++, truncated));
1071+
}
1072+
}
1073+
1074+
// Add indication if there are more overrides
1075+
if (overrides.size() > overrides_to_show && line_num <= 40) {
1076+
int remaining_overrides = overrides.size() - overrides_to_show;
1077+
std::string more_info = "... and " + std::to_string(remaining_overrides) + " more override(s)";
1078+
lines.push_back(format_line(line_num++, more_info));
1079+
}
1080+
}
1081+
1082+
// Fill remaining lines up to 40
1083+
for (int i = line_num; i <= 40; ++i) {
1084+
lines.push_back(format_line(i, ""));
1085+
}
1086+
1087+
// Combine all lines into final header
1088+
std::string header_content;
1089+
for (const auto& line : lines) {
1090+
header_content += line;
1091+
}
1092+
1093+
// Verify we have exactly 3200 bytes (40 lines * 80 chars)
1094+
if (header_content.size() != 3200) {
1095+
std::cerr << "Warning: Generated text header size is " << header_content.size()
1096+
<< " bytes, expected 3200. Adjusting..." << std::endl;
1097+
if (header_content.size() < 3200) {
1098+
header_content.resize(3200, ' ');
1099+
} else {
1100+
header_content = header_content.substr(0, 3200);
1101+
}
1102+
}
1103+
1104+
final_text_header = header_content;
1105+
}
1106+
}
1107+
1108+
// Write text header to file using TensorStore
1109+
std::string text_header_path = segy_path + "/text_header";
1110+
1111+
// Build TensorStore spec for text header
1112+
nlohmann::json spec;
1113+
spec["driver"] = "zarr";
1114+
1115+
std::string driver = "file";
1116+
if (absl::StartsWith(text_header_path, "gs://")) driver = "gcs";
1117+
else if (absl::StartsWith(text_header_path, "s3://")) driver = "s3";
1118+
1119+
spec["kvstore"] = {{"driver", driver}, {"path", text_header_path}};
1120+
1121+
if (driver != "file") {
1122+
size_t pos = text_header_path.find("://");
1123+
std::string tail = text_header_path.substr(pos + 3);
1124+
std::vector<std::string> parts;
1125+
for (auto& p : absl::StrSplit(tail, '/')) parts.emplace_back(p);
1126+
spec["kvstore"]["bucket"] = parts[0];
1127+
spec["kvstore"]["path"] = absl::StrJoin(parts.begin()+1, parts.end(), "/");
1128+
}
1129+
1130+
spec["metadata"] = {
1131+
{"dtype", "|S1"}, // Single byte string
1132+
{"shape", {3200}},
1133+
{"chunks", {3200}},
1134+
{"dimension_separator", "."},
1135+
{"compressor", nullptr},
1136+
{"fill_value", nullptr},
1137+
{"order", "C"},
1138+
{"zarr_format", 2}
1139+
};
1140+
1141+
spec["attributes"] = {
1142+
{"dimension_names", {"byte"}},
1143+
{"long_name", "SEG-Y Text Header"}
1144+
};
1145+
1146+
MDIO_ASSIGN_OR_RETURN(
1147+
auto text_header_var,
1148+
Variable<>::Open(spec, constants::kCreateClean, ctx).result());
1149+
1150+
// Create array from text header string
1151+
MDIO_ASSIGN_OR_RETURN(auto text_data, from_variable(text_header_var));
1152+
char* text_ptr = reinterpret_cast<char*>(text_data.get_data_accessor().data());
1153+
std::memcpy(text_ptr, final_text_header.data(), 3200);
1154+
1155+
// Write the text header
1156+
auto write_future = text_header_var.Write(text_data);
1157+
auto write_result = write_future.result();
1158+
if (!write_result.ok()) {
1159+
return write_result.status();
1160+
}
1161+
1162+
std::cout << "Text header written to: " << text_header_path << std::endl;
1163+
return absl::OkStatus();
1164+
}
1165+
8521166
} // namespace utils
8531167
} // namespace mdio
8541168

0 commit comments

Comments
 (0)