Skip to content

Commit 48c8240

Browse files
committed
End of day checkpoint
1 parent 05fe655 commit 48c8240

2 files changed

Lines changed: 289 additions & 29 deletions

File tree

mdio/dataset_test.cc

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,18 @@ TEST(Dataset, where4) {
796796
std::cout << cdps.get_spec().value().dump(4) << std::endl;
797797

798798
std::cout << cdps.num_samples() << std::endl;
799+
800+
auto true_domain_res = cdps.get_true_domain();
801+
ASSERT_TRUE(true_domain_res.status().ok()) << true_domain_res.status();
802+
auto true_domain = true_domain_res.value();
803+
for (auto i = 0; i < true_domain.first.size(); i++) {
804+
std::cout << "True internal domain: " << true_domain.first[i] << " " << true_domain.second[i] << " resulting in number of samples: " << (true_domain.second[i] - true_domain.first[i]) << std::endl;
805+
}
806+
auto true_offset_res = cdps.get_true_offset();
807+
ASSERT_TRUE(true_offset_res.status().ok()) << true_offset_res.status();
808+
auto true_offset = true_offset_res.value();
809+
std::cout << "True offset: " << true_offset << std::endl;
810+
// std::cout << "True internal domain: " << true_domain.first << " " << true_domain.second << " resulting in number of samples: " << (true_domain.second - true_domain.first) << std::endl;
799811
}
800812

801813
auto vRes = slicedDs.variables.get<mdio::dtypes::float32_t>("cdp-x");
@@ -804,8 +816,16 @@ TEST(Dataset, where4) {
804816
auto vD = v.Read();
805817
ASSERT_TRUE(vD.status().ok()) << vD.status();
806818
auto vda = vD.value().get_data_accessor().data();
819+
auto vOrigin = vD.value().get_data_accessor().layout().origin();
820+
std::cout << "=======Origin span of cdp-x=======" << std::endl;
821+
for (const auto& span : vOrigin) {
822+
std::cout << span << std::endl;
823+
}
824+
std::cout << "=======End of origin span of cdp-x=======" << std::endl;
825+
auto offset = vD.value().get_flattened_offset();
826+
std::cout << "vD.value().get_flattened_offset(): " << offset << std::endl;
807827
for (auto i = 0; i < vD.value().num_samples(); i++) {
808-
std::cout << "[" << i << "]: " << vda[i+vD.value().get_flattened_offset()] << std::endl;
828+
std::cout << "[" << i << "]: " << vda[i+offset] << std::endl;
809829
}
810830

811831
auto imgVarRes = slicedDs.variables.get<mdio::dtypes::float32_t>("image");
@@ -815,10 +835,21 @@ TEST(Dataset, where4) {
815835
ASSERT_TRUE(imgDataFut.status().ok()) << imgDataFut.status();
816836
auto imgData = imgDataFut.value();
817837
auto imgDataAccessor = imgData.get_data_accessor();
838+
auto imgOrigin = imgDataAccessor.layout().origin();
839+
std::cout << "=======Origin span of image=======" << std::endl;
840+
for (const auto& span : imgOrigin) {
841+
std::cout << span << std::endl;
842+
}
843+
std::cout << "=======End of origin span of image=======" << std::endl;
844+
auto offset2 = imgData.get_flattened_offset();
845+
std::cout << "imgData.get_flattened_offset(): " << offset2 << std::endl;
818846
for (auto i = 0; i < imgData.num_samples(); i++) {
819-
std::cout << "[" << i << "]: " << imgDataAccessor.data()[i+imgData.get_flattened_offset()] << std::endl;
847+
std::cout << "[" << i << "]: " << imgDataAccessor.data()[i+offset2] << std::endl;
820848
}
821849

850+
std::cout << "Performing a slow lookup of of the origin for cdp-x: " << vD.value().get_data_accessor()({15,5}) << std::endl;
851+
std::cout << "Performing a slow lookup of of the origin for image: " << imgDataAccessor({15,5,0}) << std::endl;
852+
822853
}
823854

824855
TEST(Dataset, selValue) {

mdio/variable.h

Lines changed: 256 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,244 @@ Result<Variable> slice(const Descriptors&... descriptors) const {
16441644
return intervals;
16451645
}
16461646

1647+
Result<std::pair<std::vector<Index>,std::vector<Index>>> get_true_domain() {
1648+
// 1) grab the full TensorStore spec
1649+
MDIO_ASSIGN_OR_RETURN(auto spec, get_spec());
1650+
1651+
// 2) parse the *final* global domain
1652+
auto global_min = spec["transform"]["input_inclusive_min"].template get<std::vector<Index>>();
1653+
auto global_max = spec["transform"]["input_exclusive_max"].template get<std::vector<Index>>();
1654+
const size_t rank = global_min.size();
1655+
1656+
// 3) initialize our “true” domain to extreme values
1657+
std::vector<Index> true_min(rank, std::numeric_limits<Index>::max());
1658+
std::vector<Index> true_max(rank, std::numeric_limits<Index>::min());
1659+
1660+
// 4) for each top‐level layer in the stack…
1661+
for (auto const& layer : spec["layers"]) {
1662+
auto const& transform = layer["transform"];
1663+
1664+
// 4a) its local domain
1665+
auto layer_min = transform["input_inclusive_min"].template get<std::vector<Index>>();
1666+
auto layer_max = transform["input_exclusive_max"].template get<std::vector<Index>>();
1667+
1668+
// 4b) pull out any per‐dim offsets (default = 0)
1669+
std::vector<Index> offsets(rank, 0);
1670+
if (transform.contains("output")) {
1671+
for (size_t i = 0; i < transform["output"].size() && i < rank; ++i) {
1672+
auto const& out_i = transform["output"][i];
1673+
if (out_i.contains("offset")) {
1674+
offsets[i] = out_i["offset"].template get<Index>();
1675+
}
1676+
}
1677+
}
1678+
1679+
// 4c) compute the *global* slice‐domain for this layer,
1680+
// intersect with the full domain, and see if non‐empty
1681+
bool contributes = true;
1682+
std::vector<Index> intersect_min(rank), intersect_max(rank);
1683+
for (size_t i = 0; i < rank; ++i) {
1684+
Index leaf_min = layer_min[i] + offsets[i];
1685+
Index leaf_max = layer_max[i] + offsets[i];
1686+
intersect_min[i] = std::max(global_min[i], leaf_min);
1687+
intersect_max[i] = std::min(global_max[i], leaf_max);
1688+
if (intersect_min[i] >= intersect_max[i]) {
1689+
contributes = false;
1690+
break;
1691+
}
1692+
}
1693+
if (!contributes) continue;
1694+
1695+
// 4d) merge into our running true‐domain bbox
1696+
for (size_t i = 0; i < rank; ++i) {
1697+
true_min[i] = std::min(true_min[i], intersect_min[i]);
1698+
true_max[i] = std::max(true_max[i], intersect_max[i]);
1699+
}
1700+
}
1701+
1702+
// 5) if nothing ever contributed, error out
1703+
if (true_min[0] == std::numeric_limits<Index>::max()) {
1704+
return absl::NotFoundError("No layers intersect the final domain");
1705+
}
1706+
1707+
// 6) done!
1708+
return std::make_pair(std::move(true_min), std::move(true_max));
1709+
}
1710+
1711+
1712+
static Index parse_index(const nlohmann::json &j) {
1713+
if (j.is_array()) return parse_index(j.at(0));
1714+
return j.template get<Index>();
1715+
}
1716+
static std::vector<Index> parse_vector(const nlohmann::json &j) {
1717+
std::vector<Index> out;
1718+
out.reserve(j.size());
1719+
for (auto &e : j) out.push_back(parse_index(e));
1720+
return out;
1721+
}
1722+
1723+
Result<Index> compute_volume(
1724+
nlohmann::json const &node,
1725+
std::vector<Index> const &dom_min_out,
1726+
std::vector<Index> const &dom_max_out)
1727+
{
1728+
auto driver = node["driver"].get<std::string>();
1729+
// --- invert this node’s transform to find its local domain slice ---
1730+
auto const &transform = node["transform"];
1731+
auto input_min = parse_vector(transform["input_inclusive_min"]);
1732+
auto input_max = parse_vector(transform["input_exclusive_max"]);
1733+
size_t rank = input_min.size();
1734+
1735+
// build inv_map[input_dim] = { output_dim, offset }
1736+
std::vector<std::pair<size_t,Index>> inv_map(rank);
1737+
if (transform.contains("output")) {
1738+
auto out = transform["output"];
1739+
for (size_t j = 0; j < out.size() && j < rank; ++j) {
1740+
size_t idim = out[j]["input_dimension"].get<size_t>();
1741+
Index off = out[j].contains("offset")
1742+
? parse_index(out[j]["offset"])
1743+
: 0;
1744+
inv_map[idim] = {j, off};
1745+
}
1746+
} else {
1747+
for (size_t i = 0; i < rank; ++i) inv_map[i] = {i, 0};
1748+
}
1749+
1750+
// compute this node’s *input* slice by intersecting
1751+
// input_[inclusive|exclusive] with (dom_out - offset)
1752+
std::vector<Index> local_min(rank), local_max(rank);
1753+
for (size_t i = 0; i < rank; ++i) {
1754+
auto [j, off] = inv_map[i];
1755+
Index lo = dom_min_out[j] - off;
1756+
Index hi = dom_max_out[j] - off;
1757+
local_min[i] = std::max(input_min[i], lo);
1758+
local_max[i] = std::min(input_max[i], hi);
1759+
if (local_min[i] >= local_max[i]) {
1760+
// no overlap → zero volume
1761+
return 0;
1762+
}
1763+
}
1764+
1765+
if (driver == "zarr") {
1766+
// leaf: just multiply extents
1767+
Index vol = 1;
1768+
for (size_t i = 0; i < rank; ++i) {
1769+
vol *= (local_max[i] - local_min[i]);
1770+
}
1771+
return vol;
1772+
}
1773+
1774+
// stack: sum child volumes
1775+
Index sum = 0;
1776+
for (auto const &child : node["layers"]) {
1777+
MDIO_ASSIGN_OR_RETURN(
1778+
Index child_vol,
1779+
compute_volume(child, local_min, local_max));
1780+
sum += child_vol;
1781+
}
1782+
return sum;
1783+
}
1784+
1785+
// ——— flatten an output‐coord `coord_out` into a flat offset ———
1786+
Result<Index> flatten_offset(
1787+
nlohmann::json const &node,
1788+
std::vector<Index> const &coord_out,
1789+
std::vector<Index> const &dom_min_out,
1790+
std::vector<Index> const &dom_max_out)
1791+
{
1792+
auto driver = node["driver"].get<std::string>();
1793+
auto const &transform = node["transform"];
1794+
auto input_min = parse_vector(transform["input_inclusive_min"]);
1795+
auto input_max = parse_vector(transform["input_exclusive_max"]);
1796+
size_t rank = input_min.size();
1797+
1798+
// build inv_map[input_dim] = { output_dim, offset }
1799+
std::vector<std::pair<size_t,Index>> inv_map(rank);
1800+
if (transform.contains("output")) {
1801+
auto out = transform["output"];
1802+
for (size_t j = 0; j < out.size() && j < rank; ++j) {
1803+
size_t idim = out[j]["input_dimension"].get<size_t>();
1804+
Index off = out[j].contains("offset")
1805+
? parse_index(out[j]["offset"])
1806+
: 0;
1807+
inv_map[idim] = {j, off};
1808+
}
1809+
} else {
1810+
for (size_t i = 0; i < rank; ++i) inv_map[i] = {i, 0};
1811+
}
1812+
1813+
// compute this node’s *input* slice
1814+
std::vector<Index> local_min(rank), local_max(rank);
1815+
for (size_t i = 0; i < rank; ++i) {
1816+
auto [j, off] = inv_map[i];
1817+
Index lo = dom_min_out[j] - off;
1818+
Index hi = dom_max_out[j] - off;
1819+
local_min[i] = std::max(input_min[i], lo);
1820+
local_max[i] = std::min(input_max[i], hi);
1821+
if (local_min[i] >= local_max[i]) {
1822+
return absl::NotFoundError("No overlap in flatten_offset");
1823+
}
1824+
}
1825+
1826+
if (driver == "zarr") {
1827+
// leaf: map coord_out → local input coords, then dot with strides
1828+
std::vector<Index> idx(rank);
1829+
for (size_t i = 0; i < rank; ++i) {
1830+
auto [j, off] = inv_map[i];
1831+
idx[i] = coord_out[j] - off - input_min[i];
1832+
}
1833+
// compute C-order strides from the *full* metadata.shape
1834+
auto shape = node["metadata"]["shape"]
1835+
.template get<std::vector<Index>>();
1836+
std::vector<Index> stride(rank,1);
1837+
for (int d = int(rank)-2; d >= 0; --d) {
1838+
stride[d] = stride[d+1] * shape[d+1];
1839+
}
1840+
Index off = 0;
1841+
for (size_t i = 0; i < rank; ++i) {
1842+
off += idx[i] * stride[i];
1843+
}
1844+
return off;
1845+
}
1846+
1847+
// stack: find which child holds coord_out, summing skips
1848+
Index base = 0;
1849+
for (auto const &child : node["layers"]) {
1850+
// compute this child’s volume IN THIS NODE’S slice:
1851+
MDIO_ASSIGN_OR_RETURN(
1852+
Index child_vol,
1853+
compute_volume(child, local_min, local_max));
1854+
1855+
// but also test whether coord_out lives in that child:
1856+
// we can re‑use flatten_offset(child, coord_out, local_min, local_max)
1857+
// but guard against NotFound to decide skip vs descend:
1858+
auto try_descend = flatten_offset(child, coord_out, local_min, local_max);
1859+
if (try_descend.ok()) {
1860+
return base + try_descend.value();
1861+
}
1862+
// else: coord not here ⇒ skip its volume
1863+
base += child_vol;
1864+
}
1865+
1866+
return absl::NotFoundError("Coordinate fell into no stack child");
1867+
}
1868+
1869+
mdio::Result<Index> get_true_offset() {
1870+
// 1) pull the full spec
1871+
MDIO_ASSIGN_OR_RETURN(nlohmann::json spec, get_spec());
1872+
1873+
// 2) compute the true N‑D domain [inclusive_min,exclusive_max)
1874+
MDIO_ASSIGN_OR_RETURN(auto true_dom, get_true_domain());
1875+
auto const &true_min = true_dom.first;
1876+
1877+
// 3) global domain = root transform
1878+
auto global_min = parse_vector(spec["transform"]["input_inclusive_min"]);
1879+
auto global_max = parse_vector(spec["transform"]["input_exclusive_max"]);
1880+
1881+
// 4) flatten!
1882+
return flatten_offset(spec, true_min, global_min, global_max);
1883+
}
1884+
16471885
// ===========================Member data getters===========================
16481886
/**
16491887
* @brief Gets the name of the variable.
@@ -1932,35 +2170,26 @@ struct VariableData {
19322170
* }
19332171
* @endcode
19342172
*/
1935-
// ptrdiff_t get_flattened_offset() {
1936-
// auto accessor = get_data_accessor();
1937-
// // The raw pointer to the data. May not start at 0.
1938-
// auto origin_ptr = accessor.data();
1939-
// // The raw pointer to the first element of the data.
1940-
// auto offset_ptr = accessor.byte_strided_origin_pointer();
1941-
// char* origin_addr = reinterpret_cast<char*>(origin_ptr);
1942-
// // We have to get the raw pointer
1943-
// char* offset_addr = reinterpret_cast<char*>(offset_ptr.get());
1944-
// ptrdiff_t byte_diff = offset_addr - origin_addr;
1945-
// return byte_diff / dtype().size();
1946-
// }
19472173
ptrdiff_t get_flattened_offset() {
19482174
auto accessor = get_data_accessor();
1949-
1950-
// 1) get the domain origin (inclusive min) for each dim
1951-
auto origin = dimensions().origin(); // vector<Index>
1952-
1953-
// 2) get the byte‑stride for advancing 1 element in each dim
1954-
auto byte_strides = accessor.byte_strides(); // vector<ptrdiff_t>
1955-
1956-
// 3) dot(origin, byte_strides)
1957-
ptrdiff_t byte_offset = 0;
1958-
for (size_t i = 0; i < origin.size(); ++i) {
1959-
byte_offset += origin[i] * byte_strides[i];
1960-
}
1961-
1962-
// 4) convert from bytes → element‑counts
1963-
return byte_offset / dtype().size();
2175+
// The raw pointer to the data. May not start at 0.
2176+
auto origin_ptr = accessor.data();
2177+
// The raw pointer to the first element of the data.
2178+
auto offset_ptr = accessor.byte_strided_origin_pointer();
2179+
char* origin_addr = reinterpret_cast<char*>(origin_ptr);
2180+
// We have to get the raw pointer
2181+
char* offset_addr = reinterpret_cast<char*>(offset_ptr.get());
2182+
ptrdiff_t byte_diff = offset_addr - origin_addr;
2183+
2184+
// auto true_domain_res = get_true_domain();
2185+
// if (!true_domain_res.status().ok()) {
2186+
// std::cerr << "Error getting true domain: " << true_domain_res.status() << std::endl;
2187+
// } else {
2188+
// auto true_domain = true_domain_res.value();
2189+
// std::cout << "To access the true domain, you must offset the returned offset by " << true_domain.first << std::endl;
2190+
// }
2191+
2192+
return byte_diff / dtype().size();
19642193
}
19652194

19662195
// An identifier for the variable.

0 commit comments

Comments
 (0)