@@ -148,23 +148,14 @@ make sure you have restarted the kernel since doing `pip install`. Then re-run t
148148### 3.1 AWS S3 paths
149149
150150``` {code-cell}
151- s3_bucket = "irsa-fornax-testdata "
152- euclid_prefix = "EUCLID /q1/catalogues "
151+ s3_bucket = "nasa- irsa-euclid-q1 "
152+ euclid_prefix = "contributed /q1/merged_objects/hats "
153153
154154euclid_hats_collection_uri = f"s3://{s3_bucket}/{euclid_prefix}" # for lsdb
155- euclid_parquet_metadata_path = f"{s3_bucket}/{euclid_prefix}/hats/dataset/_metadata" # for pyarrow
156- euclid_parquet_schema_path = f"{s3_bucket}/{euclid_prefix}/hats/dataset/_common_metadata" # for pyarrow
157-
158- # # Temporary try/except to handle credentials in different environments before public release.
159- # try:
160- # # If running from within IPAC's network, your IP address acts as your credentials so this should work.
161- # lsdb.read_hats(euclid_hats_collection_uri)
162- # except PermissionError:
163- # # If running from Fornax, credentials are provided automatically under the hood but
164- # # lsdb ignores them in the call above. Construct a UPath which will pick up the credentials.
165- # from upath import UPath
166-
167- # euclid_hats_collection_uri = UPath(euclid_hats_collection_uri)
155+ euclid_parquet_metadata_path = f"{s3_bucket}/{euclid_prefix}/euclid_q1_merged_objects-hats/dataset/_metadata" # for pyarrow
156+ euclid_parquet_schema_path = f"{s3_bucket}/{euclid_prefix}/euclid_q1_merged_objects-hats/dataset/_common_metadata" # for pyarrow
157+
158+ s3_filesystem = pyarrow.fs.S3FileSystem(anonymous=True)
168159```
169160
170161### 3.2 Helper functions
@@ -209,7 +200,7 @@ def flux_to_magnitude(flux_col_name: str, color_col_names: tuple[str, str] | Non
209200
210201``` {code-cell}
211202# Load the catalog as a PyArrow dataset. This is used in many examples below.
212- dataset = pyarrow.dataset.parquet_dataset(euclid_parquet_metadata_path, partitioning="hive", filesystem=pyarrow.fs.S3FileSystem() )
203+ dataset = pyarrow.dataset.parquet_dataset(euclid_parquet_metadata_path, partitioning="hive", filesystem=s3_filesystem )
213204```
214205
215206### 3.4 Frequently used columns
@@ -434,7 +425,7 @@ spe_filter = (
434425# Execute the filter and load.
435426spe_df = dataset.to_table(columns=spe_columns, filter=spe_filter).to_pandas()
436427spe_df = spe_df.set_index(OBJECT_ID).sort_index()
437- # 27s
428+ # 1m 2s
438429```
439430
440431Plot redshift distributions
@@ -1061,7 +1052,6 @@ Here, we follow IRSA's
10611052to inspect the parquet schema.
10621053
10631054``` {code-cell}
1064- s3_filesystem = pyarrow.fs.S3FileSystem()
10651055schema = pyarrow.parquet.read_schema(euclid_parquet_schema_path, filesystem=s3_filesystem)
10661056```
10671057
0 commit comments