|
18 | 18 | #include <cstring> |
19 | 19 | #include <string> |
20 | 20 | #include <vector> |
| 21 | +#include <sstream> |
| 22 | +#include <iomanip> |
| 23 | +#include <chrono> |
| 24 | +#include <ctime> |
21 | 25 |
|
22 | 26 | #include <algorithm> // for std::erase_if |
23 | 27 | #include <thread> // for sleep_for |
@@ -208,8 +212,8 @@ inline TraceHeaderComposer::TraceHeaderComposer() { |
208 | 212 | {"impulse_signal_polarity", 149, "<i2"}, |
209 | 213 | {"vibratory_polarity_code", 151, "<i2"}, |
210 | 214 |
|
211 | | - // # **SEG‑Y Rev 1 additions (bytes 153–160ish)** |
212 | | - {"segy_format_revision", 153, "<i2"}, // # Format revision (always 1 for Rev 1) |
| 215 | + // # **SEG‑Y Rev 1 additions (bytes 153–160ish)** |
| 216 | + {"segy_format_revision", 153, "<i2"}, // # Format revision (always 1 for Rev 1) |
213 | 217 | {"fixed_length_trace_flag", 155, "<i2"}, // # 1 = fixed-length traces present :contentReference[oaicite:1]{index=1} |
214 | 218 | {"num_textual_hdr_ext", 157, "<i2"}, // # Number of 3200‑byte Extended Textual File Header records :contentReference[oaicite:2]{index=2} |
215 | 219 |
|
@@ -565,6 +569,15 @@ inline Result<void> TraceHeaderMapper::PopulateTraceHeader( |
565 | 569 | return absl::OkStatus(); |
566 | 570 | } |
567 | 571 |
|
| 572 | +// Forward declaration |
| 573 | +Result<void> CreateSegyTextHeader( |
| 574 | + const std::string& text_header, |
| 575 | + const Dataset& dataset, |
| 576 | + const std::string& mdio_path, |
| 577 | + const std::string& segy_path, |
| 578 | + const std::vector<TraceHeaderField>& overrides, |
| 579 | + const Context& ctx); |
| 580 | + |
568 | 581 | /** |
569 | 582 | * @brief Converts an MDIO dataset to a SEG-Y styled Variable. |
570 | 583 | * |
@@ -626,6 +639,21 @@ Result<void> MdioToSegy( |
626 | 639 | } |
627 | 640 | } |
628 | 641 |
|
| 642 | + // Create and write SEG-Y text header |
| 643 | + std::vector<TraceHeaderField> applied_overrides; |
| 644 | + for (const auto& field : trace_headers.fields()) { |
| 645 | + // Check if this field was overridden by comparing with default names |
| 646 | + if (field.name == "inline" || field.name == "crossline" || |
| 647 | + field.name == "cdp-x" || field.name == "cdp-y") { |
| 648 | + applied_overrides.push_back(field); |
| 649 | + } |
| 650 | + } |
| 651 | + |
| 652 | + auto text_header_result = CreateSegyTextHeader(text_header, ds, mdio_path, segy_path, applied_overrides, ctx); |
| 653 | + if (!text_header_result.ok()) { |
| 654 | + std::cerr << "Warning: Failed to create text header: " << text_header_result.status() << std::endl; |
| 655 | + } |
| 656 | + |
629 | 657 | // Pick the highest-rank (float-prefer) seismic variable |
630 | 658 | bool found = false; |
631 | 659 | Variable<> seismic_var; |
@@ -849,6 +877,292 @@ Result<void> MdioToSegy( |
849 | 877 | return absl::OkStatus(); |
850 | 878 | } |
851 | 879 |
|
| 880 | +/** |
| 881 | + * @brief Creates and writes a SEG-Y text header. |
| 882 | + * |
| 883 | + * This function handles the creation of a SEG-Y text header with fallback logic: |
| 884 | + * 1. Use provided text_header if it's valid (3200 bytes) |
| 885 | + * 2. Check dataset metadata for "text_header" field |
| 886 | + * 3. Generate default header with MDIO path, date, and override info |
| 887 | + * |
| 888 | + * @param text_header Input text header string |
| 889 | + * @param dataset MDIO dataset for metadata lookup |
| 890 | + * @param mdio_path Path to input MDIO dataset |
| 891 | + * @param segy_path Path where SEG-Y output is being written |
| 892 | + * @param overrides Applied trace header overrides |
| 893 | + * @param ctx TensorStore context |
| 894 | + * @return Status of the text header creation |
| 895 | + */ |
| 896 | +Result<void> CreateSegyTextHeader( |
| 897 | + const std::string& text_header, |
| 898 | + const Dataset& dataset, |
| 899 | + const std::string& mdio_path, |
| 900 | + const std::string& segy_path, |
| 901 | + const std::vector<TraceHeaderField>& overrides, |
| 902 | + const Context& ctx) { |
| 903 | + |
| 904 | + std::string final_text_header; |
| 905 | + |
| 906 | + // Helper function to validate and fix SEG-Y text header format |
| 907 | + auto validate_and_fix_header = [](const std::string& header) -> std::pair<bool, std::string> { |
| 908 | + if (header.size() != 3200) { |
| 909 | + return {false, ""}; |
| 910 | + } |
| 911 | + |
| 912 | + // Check if it's properly formatted as 40 lines of 80 characters |
| 913 | + std::string fixed_header; |
| 914 | + for (int line = 0; line < 40; ++line) { |
| 915 | + size_t start = line * 80; |
| 916 | + if (start >= header.size()) break; |
| 917 | + |
| 918 | + std::string line_content = header.substr(start, 80); |
| 919 | + |
| 920 | + // Ensure line is exactly 80 characters |
| 921 | + if (line_content.size() < 80) { |
| 922 | + line_content.resize(80, ' '); |
| 923 | + } else if (line_content.size() > 80) { |
| 924 | + line_content = line_content.substr(0, 80); |
| 925 | + } |
| 926 | + |
| 927 | + fixed_header += line_content; |
| 928 | + } |
| 929 | + |
| 930 | + // Ensure exactly 3200 bytes |
| 931 | + if (fixed_header.size() < 3200) { |
| 932 | + fixed_header.resize(3200, ' '); |
| 933 | + } else if (fixed_header.size() > 3200) { |
| 934 | + fixed_header = fixed_header.substr(0, 3200); |
| 935 | + } |
| 936 | + |
| 937 | + return {true, fixed_header}; |
| 938 | + }; |
| 939 | + |
| 940 | + // Check if provided text header is valid (3200 bytes) |
| 941 | + if (text_header.size() == 3200) { |
| 942 | + auto [is_valid, fixed_header] = validate_and_fix_header(text_header); |
| 943 | + if (is_valid) { |
| 944 | + std::cout << "Using provided text header (3200 bytes, validated format)" << std::endl; |
| 945 | + final_text_header = fixed_header; |
| 946 | + } else { |
| 947 | + std::cout << "Provided text header has invalid format, checking dataset metadata..." << std::endl; |
| 948 | + } |
| 949 | + } else if (!text_header.empty()) { |
| 950 | + std::cout << "Provided text header invalid size (" << text_header.size() |
| 951 | + << " bytes), checking dataset metadata..." << std::endl; |
| 952 | + } else { |
| 953 | + std::cout << "No text header provided, checking dataset metadata..." << std::endl; |
| 954 | + } |
| 955 | + |
| 956 | + // Try to get text header from dataset metadata if not already set |
| 957 | + if (final_text_header.empty()) { |
| 958 | + bool found_in_metadata = false; |
| 959 | + try { |
| 960 | + auto metadata = dataset.getMetadata(); |
| 961 | + if (metadata.contains("attributes") && |
| 962 | + metadata["attributes"].contains("text_header")) { |
| 963 | + std::string metadata_header = metadata["attributes"]["text_header"].get<std::string>(); |
| 964 | + if (metadata_header.size() == 3200) { |
| 965 | + auto [is_valid, fixed_header] = validate_and_fix_header(metadata_header); |
| 966 | + if (is_valid) { |
| 967 | + std::cout << "Using text header from dataset metadata (validated format)" << std::endl; |
| 968 | + final_text_header = fixed_header; |
| 969 | + found_in_metadata = true; |
| 970 | + } else { |
| 971 | + std::cout << "Text header in metadata has invalid format" << std::endl; |
| 972 | + } |
| 973 | + } else { |
| 974 | + std::cout << "Text header in metadata has invalid size (" |
| 975 | + << metadata_header.size() << " bytes)" << std::endl; |
| 976 | + } |
| 977 | + } |
| 978 | + } catch (const std::exception& e) { |
| 979 | + std::cout << "Could not read text header from metadata: " << e.what() << std::endl; |
| 980 | + } |
| 981 | + |
| 982 | + // Generate default text header if not found |
| 983 | + if (!found_in_metadata) { |
| 984 | + std::cout << "Generating default text header..." << std::endl; |
| 985 | + |
| 986 | + // Get current date |
| 987 | + auto now = std::chrono::system_clock::now(); |
| 988 | + auto time_t = std::chrono::system_clock::to_time_t(now); |
| 989 | + auto tm = *std::localtime(&time_t); |
| 990 | + char date_str[32]; |
| 991 | + std::strftime(date_str, sizeof(date_str), "%Y-%m-%d %H:%M:%S", &tm); |
| 992 | + |
| 993 | + // Helper function to format a line to exactly 80 characters |
| 994 | + auto format_line = [](int line_num, const std::string& content) -> std::string { |
| 995 | + std::string line = "C" + std::to_string(line_num); |
| 996 | + if (line_num < 10) line = "C " + std::to_string(line_num); // Add space for single digits |
| 997 | + line += " " + content; |
| 998 | + |
| 999 | + // Pad or truncate to exactly 80 characters |
| 1000 | + if (line.length() < 80) { |
| 1001 | + line.resize(80, ' '); |
| 1002 | + } else if (line.length() > 80) { |
| 1003 | + line = line.substr(0, 80); |
| 1004 | + } |
| 1005 | + return line; |
| 1006 | + }; |
| 1007 | + |
| 1008 | + // Create header lines |
| 1009 | + std::vector<std::string> lines; |
| 1010 | + lines.push_back(format_line(1, "SEG-Y file created from MDIO dataset")); |
| 1011 | + lines.push_back(format_line(2, "")); |
| 1012 | + |
| 1013 | + // Handle potentially long MDIO path - split if necessary |
| 1014 | + std::string path_prefix = "Source MDIO path: "; |
| 1015 | + std::string full_path_line = path_prefix + mdio_path; |
| 1016 | + if (full_path_line.length() <= 76) { // 80 - "C# " = 76 chars max |
| 1017 | + lines.push_back(format_line(3, full_path_line)); |
| 1018 | + lines.push_back(format_line(4, "Created: " + std::string(date_str))); |
| 1019 | + lines.push_back(format_line(5, "")); |
| 1020 | + } else { |
| 1021 | + // Split long path across multiple lines |
| 1022 | + lines.push_back(format_line(3, path_prefix)); |
| 1023 | + |
| 1024 | + // Find a good break point in the path |
| 1025 | + std::string remaining_path = mdio_path; |
| 1026 | + int current_line = 4; |
| 1027 | + while (!remaining_path.empty() && current_line <= 39) { |
| 1028 | + int max_path_chars = 74; // 76 - 2 for indentation |
| 1029 | + if (remaining_path.length() <= max_path_chars) { |
| 1030 | + lines.push_back(format_line(current_line++, " " + remaining_path)); |
| 1031 | + break; |
| 1032 | + } else { |
| 1033 | + // Find break point (prefer slash or dash) |
| 1034 | + int break_point = max_path_chars; |
| 1035 | + for (int i = max_path_chars - 1; i >= max_path_chars - 20 && i >= 0; --i) { |
| 1036 | + if (remaining_path[i] == '/' || remaining_path[i] == '-') { |
| 1037 | + break_point = i + 1; // Include the separator |
| 1038 | + break; |
| 1039 | + } |
| 1040 | + } |
| 1041 | + |
| 1042 | + std::string path_part = remaining_path.substr(0, break_point); |
| 1043 | + lines.push_back(format_line(current_line++, " " + path_part)); |
| 1044 | + remaining_path = remaining_path.substr(break_point); |
| 1045 | + } |
| 1046 | + } |
| 1047 | + |
| 1048 | + lines.push_back(format_line(current_line++, "Created: " + std::string(date_str))); |
| 1049 | + lines.push_back(format_line(current_line++, "")); |
| 1050 | + } |
| 1051 | + |
| 1052 | + int line_num = lines.size() + 1; |
| 1053 | + if (!overrides.empty()) { |
| 1054 | + lines.push_back(format_line(line_num++, "Applied trace header overrides:")); |
| 1055 | + |
| 1056 | + // Calculate how many overrides we can fit in remaining lines |
| 1057 | + int available_lines = 40 - line_num; |
| 1058 | + int overrides_to_show = std::min(static_cast<int>(overrides.size()), available_lines - 1); // -1 for potential "..." line |
| 1059 | + |
| 1060 | + for (int i = 0; i < overrides_to_show && line_num <= 39; ++i) { |
| 1061 | + const auto& override = overrides[i]; |
| 1062 | + std::string override_info = "- " + override.name + " at byte " + std::to_string(override.offset); |
| 1063 | + |
| 1064 | + // Check if this override description fits in one line (76 chars max) |
| 1065 | + if (override_info.length() <= 76) { |
| 1066 | + lines.push_back(format_line(line_num++, override_info)); |
| 1067 | + } else { |
| 1068 | + // Truncate long override descriptions |
| 1069 | + std::string truncated = override_info.substr(0, 73) + "..."; // 76 - 3 for "..." |
| 1070 | + lines.push_back(format_line(line_num++, truncated)); |
| 1071 | + } |
| 1072 | + } |
| 1073 | + |
| 1074 | + // Add indication if there are more overrides |
| 1075 | + if (overrides.size() > overrides_to_show && line_num <= 40) { |
| 1076 | + int remaining_overrides = overrides.size() - overrides_to_show; |
| 1077 | + std::string more_info = "... and " + std::to_string(remaining_overrides) + " more override(s)"; |
| 1078 | + lines.push_back(format_line(line_num++, more_info)); |
| 1079 | + } |
| 1080 | + } |
| 1081 | + |
| 1082 | + // Fill remaining lines up to 40 |
| 1083 | + for (int i = line_num; i <= 40; ++i) { |
| 1084 | + lines.push_back(format_line(i, "")); |
| 1085 | + } |
| 1086 | + |
| 1087 | + // Combine all lines into final header |
| 1088 | + std::string header_content; |
| 1089 | + for (const auto& line : lines) { |
| 1090 | + header_content += line; |
| 1091 | + } |
| 1092 | + |
| 1093 | + // Verify we have exactly 3200 bytes (40 lines * 80 chars) |
| 1094 | + if (header_content.size() != 3200) { |
| 1095 | + std::cerr << "Warning: Generated text header size is " << header_content.size() |
| 1096 | + << " bytes, expected 3200. Adjusting..." << std::endl; |
| 1097 | + if (header_content.size() < 3200) { |
| 1098 | + header_content.resize(3200, ' '); |
| 1099 | + } else { |
| 1100 | + header_content = header_content.substr(0, 3200); |
| 1101 | + } |
| 1102 | + } |
| 1103 | + |
| 1104 | + final_text_header = header_content; |
| 1105 | + } |
| 1106 | + } |
| 1107 | + |
| 1108 | + // Write text header to file using TensorStore |
| 1109 | + std::string text_header_path = segy_path + "/text_header"; |
| 1110 | + |
| 1111 | + // Build TensorStore spec for text header |
| 1112 | + nlohmann::json spec; |
| 1113 | + spec["driver"] = "zarr"; |
| 1114 | + |
| 1115 | + std::string driver = "file"; |
| 1116 | + if (absl::StartsWith(text_header_path, "gs://")) driver = "gcs"; |
| 1117 | + else if (absl::StartsWith(text_header_path, "s3://")) driver = "s3"; |
| 1118 | + |
| 1119 | + spec["kvstore"] = {{"driver", driver}, {"path", text_header_path}}; |
| 1120 | + |
| 1121 | + if (driver != "file") { |
| 1122 | + size_t pos = text_header_path.find("://"); |
| 1123 | + std::string tail = text_header_path.substr(pos + 3); |
| 1124 | + std::vector<std::string> parts; |
| 1125 | + for (auto& p : absl::StrSplit(tail, '/')) parts.emplace_back(p); |
| 1126 | + spec["kvstore"]["bucket"] = parts[0]; |
| 1127 | + spec["kvstore"]["path"] = absl::StrJoin(parts.begin()+1, parts.end(), "/"); |
| 1128 | + } |
| 1129 | + |
| 1130 | + spec["metadata"] = { |
| 1131 | + {"dtype", "|S1"}, // Single byte string |
| 1132 | + {"shape", {3200}}, |
| 1133 | + {"chunks", {3200}}, |
| 1134 | + {"dimension_separator", "."}, |
| 1135 | + {"compressor", nullptr}, |
| 1136 | + {"fill_value", nullptr}, |
| 1137 | + {"order", "C"}, |
| 1138 | + {"zarr_format", 2} |
| 1139 | + }; |
| 1140 | + |
| 1141 | + spec["attributes"] = { |
| 1142 | + {"dimension_names", {"byte"}}, |
| 1143 | + {"long_name", "SEG-Y Text Header"} |
| 1144 | + }; |
| 1145 | + |
| 1146 | + MDIO_ASSIGN_OR_RETURN( |
| 1147 | + auto text_header_var, |
| 1148 | + Variable<>::Open(spec, constants::kCreateClean, ctx).result()); |
| 1149 | + |
| 1150 | + // Create array from text header string |
| 1151 | + MDIO_ASSIGN_OR_RETURN(auto text_data, from_variable(text_header_var)); |
| 1152 | + char* text_ptr = reinterpret_cast<char*>(text_data.get_data_accessor().data()); |
| 1153 | + std::memcpy(text_ptr, final_text_header.data(), 3200); |
| 1154 | + |
| 1155 | + // Write the text header |
| 1156 | + auto write_future = text_header_var.Write(text_data); |
| 1157 | + auto write_result = write_future.result(); |
| 1158 | + if (!write_result.ok()) { |
| 1159 | + return write_result.status(); |
| 1160 | + } |
| 1161 | + |
| 1162 | + std::cout << "Text header written to: " << text_header_path << std::endl; |
| 1163 | + return absl::OkStatus(); |
| 1164 | +} |
| 1165 | + |
852 | 1166 | } // namespace utils |
853 | 1167 | } // namespace mdio |
854 | 1168 |
|
|
0 commit comments