Skip to content

Commit 05fe655

Browse files
committed
End of day checkpoint
1 parent a6b8101 commit 05fe655

3 files changed

Lines changed: 469 additions & 286 deletions

File tree

mdio/dataset.h

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -537,19 +537,25 @@ class Dataset {
537537
Result<Dataset> isel(Descriptors&... descriptors) {
538538
VariableCollection vars;
539539

540-
std::cout << "isel forwarded descriptors..." << std::endl;
541-
((std::cout << "Descriptor: " << descriptors.label.label() << " "
542-
<< descriptors.start << " " << descriptors.stop << " "
543-
<< descriptors.step << std::endl), ...);
544-
std::cout << "================================================" << std::endl;
540+
// std::cout << "isel forwarded descriptors..." << std::endl;
541+
// ((std::cout << "Descriptor: " << descriptors.label.label() << " "
542+
// << descriptors.start << " " << descriptors.stop << " "
543+
// << descriptors.step << std::endl), ...);
544+
// std::cout << "================================================" << std::endl;
545545

546546
// the shape of the new domain
547547
std::map<std::string, tensorstore::IndexDomainDimension<>> dims;
548548
std::vector<std::string> keys = variables.get_iterable_accessor();
549549

550+
// std::cout << "keys: " << std::endl;
551+
// for (const auto& key : keys) {
552+
// std::cout << key << std::endl;
553+
// }
554+
550555
for (const auto& name : keys) {
556+
MDIO_ASSIGN_OR_RETURN(auto retreivedVar, variables.at(name));
551557
MDIO_ASSIGN_OR_RETURN(auto variable,
552-
variables.at(name).value().slice(
558+
retreivedVar.slice(
553559
std::forward<Descriptors>(descriptors)...))
554560
// add to variable
555561
vars.add(name, variable);
@@ -726,15 +732,6 @@ class Dataset {
726732
*/
727733
Result<Dataset> isel(const std::vector<RangeDescriptor<Index>>& slices) {
728734

729-
/*
730-
What I need to do:
731-
If there is a disjoint dimension coordinate, I need pass only those to the Variable slice method.
732-
I can use recursion to handle this.
733-
I will get all of the same labeled slices and perform that slice.
734-
I will then pass the remaining slices to the recursive isel call.
735-
I think this will fix my issues.
736-
*/
737-
738735
if (slices.empty()) {
739736
return absl::InvalidArgumentError("No slices provided.");
740737
}
@@ -743,32 +740,32 @@ class Dataset {
743740

744741
bool do_simple_slice = true;
745742

746-
std::set<std::string> labels;
747-
if (reducedSlices.size() < 1) {
748-
labels.insert(reducedSlices[0].label.label());
749-
for (auto i=1; i<reducedSlices.size(); i++) {
750-
if (labels.count(reducedSlices[i].label.label()) > 0) {
751-
do_simple_slice = false;
752-
break;
753-
}
754-
labels.insert(reducedSlices[i].label.label());
743+
// Build a set of just the labels
744+
std::set<std::string_view> labels;
745+
labels.insert(reducedSlices[0].label.label());
746+
for (auto i=1; i<reducedSlices.size(); i++) {
747+
if (labels.count(reducedSlices[i].label.label()) > 0) {
748+
do_simple_slice = false;
749+
// break; // Don't break here, we can check all the labels and see if we are left with a single dimension.
755750
}
756-
} else {
757-
return absl::InvalidArgumentError("No slices provided.");
751+
labels.insert(reducedSlices[i].label.label());
758752
}
759753

760-
if (!do_simple_slice) {
761-
std::cout << "Handling multi-dimensional slices..." << std::endl;
762-
754+
// If we are left with a single dimension, we can just do a simple slice.
755+
if (labels.size() == 1) {
756+
do_simple_slice = true;
763757
}
764758

765-
std::cout << "Reduced slices: " << std::endl;
766-
for (auto &slice : reducedSlices) {
767-
std::cout << "[" << slice.label.label() << ", " << slice.start << ", " << slice.stop << ", " << slice.step << "]" << std::endl;
768-
}
759+
// Debugging print for all RangeDescriptors pending
760+
// std::cout << "Reduced slices: " << std::endl;
761+
// for (auto &slice : reducedSlices) {
762+
// std::cout << "[" << slice.label.label() << ", " << slice.start << ", " << slice.stop << ", " << slice.step << "]" << std::endl;
763+
// }
769764

765+
// Pre-emptively split the RangeDescriptors if there are too many.
766+
// This is not the final logic that we want.
770767
if (reducedSlices.size() > internal::kMaxNumSlices) {
771-
std::cout << "Recursively slicing the dataset..." << std::endl;
768+
// std::cout << "Recursively slicing the dataset..." << std::endl;
772769
std::size_t halfElements = reducedSlices.size() / 2;
773770
if (halfElements % 2 != 0) {
774771
halfElements += 1;
@@ -790,8 +787,18 @@ class Dataset {
790787
return call_isel_with_vector_impl(
791788
slicesCopy, std::make_index_sequence<internal::kMaxNumSlices>{});
792789
} else {
793-
std::vector<RangeDescriptor<Index>> slicesCopy;
794-
for (int i=0; i<)
790+
std::vector<RangeDescriptor<Index>> simpleSlices;
791+
std::vector<RangeDescriptor<Index>> complexSlices;
792+
auto simpleLabel = reducedSlices[0].label.label();
793+
for (auto &slice : reducedSlices) {
794+
if (slice.label.label() != simpleLabel) {
795+
complexSlices.push_back(slice);
796+
} else {
797+
simpleSlices.push_back(slice);
798+
}
799+
}
800+
MDIO_ASSIGN_OR_RETURN(auto ds, isel(static_cast<const std::vector<RangeDescriptor<Index>>&>(simpleSlices)));
801+
return ds.isel(static_cast<const std::vector<RangeDescriptor<Index>>&>(complexSlices));
795802
}
796803
}
797804

@@ -840,7 +847,7 @@ class Dataset {
840847
}
841848
values.insert(val);
842849
bool found = false;
843-
for (Index i = offset; i < var.num_samples() + offset; ++i) {
850+
for (Index i = offset; i < varDat.num_samples() + offset; ++i) {
844851
if (varAccessor({i}) == val) {
845852
if (found) {
846853
trueStatus = absl::InvalidArgumentError(
@@ -859,7 +866,7 @@ class Dataset {
859866
}
860867
} else {
861868
// We must check for every occurance of the value
862-
for (Index i = offset; i < var.num_samples() + offset; ++i) {
869+
for (Index i = offset; i < varDat.num_samples() + offset; ++i) {
863870
if (varAccessor({i}) == descriptor.value) {
864871
label_to_indices[descriptor.label.label()].push_back(i);
865872
}
@@ -1068,7 +1075,7 @@ class Dataset {
10681075
std::pair<bool, Index> stop = {false, 0};
10691076
auto offset = varDat.get_flattened_offset();
10701077

1071-
for (Index i = offset; i < var.num_samples() + offset; i++) {
1078+
for (Index i = offset; i < varDat.num_samples() + offset; i++) {
10721079
if (varAccessor({i}) == descriptor.start) {
10731080
if (start.first) {
10741081
trueStatus = absl::InvalidArgumentError("Repeated start value.");
@@ -1173,7 +1180,7 @@ class Dataset {
11731180
// **Use the flattened data pointer + offset for N‑D arrays:**
11741181
auto* data_ptr = varDat.get_data_accessor().data();
11751182
Index offset = varDat.get_flattened_offset();
1176-
Index nSamples = var.num_samples();
1183+
Index nSamples = varDat.num_samples();
11771184

11781185
// 3) Collect all flat indices where coord == target value
11791186
std::vector<Index> indices;
@@ -1198,11 +1205,11 @@ class Dataset {
11981205

11991206
// TODO(BrianMichell): Coalesce the slices into fewer descriptors.
12001207

1201-
std::cout << "All RangeDescriptors: " << std::endl;
1202-
for (const auto& slice : elementwiseSlices) {
1203-
// std::cout << slice << std::endl;
1204-
std::cout << "[" << slice.label << ", " << slice.start << ", " << slice.stop << ", " << slice.step << "]" << std::endl;
1205-
}
1208+
// std::cout << "All RangeDescriptors: " << std::endl;
1209+
// for (const auto& slice : elementwiseSlices) {
1210+
// // std::cout << slice << std::endl;
1211+
// std::cout << "[" << slice.label << ", " << slice.start << ", " << slice.stop << ", " << slice.step << "]" << std::endl;
1212+
// }
12061213

12071214
MDIO_ASSIGN_OR_RETURN(auto ds, isel(static_cast<const std::vector<RangeDescriptor<Index>>&>(elementwiseSlices)));
12081215
// TODO(BrianMichell): Make this method more async friendly.

0 commit comments

Comments
 (0)