@@ -3,7 +3,7 @@ source("2a_model/src/write_model_config_files.R")
33
44p2a_targets_list <- list (
55
6- # # PREPARE (RENAME, JOIN) INPUT AND OUTPUT FILES ##
6+ # # 1) COMBINE AND FORMAT MODEL-READY INPUTS AND OUTPUTS ##
77 # join met data with light input data
88 tar_target(
99 p2a_met_light_data ,
@@ -19,26 +19,30 @@ p2a_targets_list <- list(
1919 relocate(date , .after = COMID )
2020 ),
2121
22- # match site_ids to seg_ids
22+ # join met and light data with site_ids (resulting data frame will have
23+ # 16 unique COMID's which matches the number of well-observed reaches).
2324 tar_target(
2425 p2a_met_data_w_sites ,
2526 match_site_ids_to_segs(p2a_met_light_data , p2_sites_w_segs )
2627 ),
2728
28- # match seg attributes with site_ids
29+ # join segment attributes with site_ids (resulting data frame will have one
30+ # row for each unique COMID x site_id in the lower DRB; n = 10,111).
2931 tar_target(
3032 p2a_seg_attr_w_sites ,
3133 match_site_ids_to_segs(p2_seg_attr_data , p2_sites_w_segs )
3234 ),
3335
34- # join the metab data with the DO observations
36+ # join the metabolism data with the DO observations (use full_join to include
37+ # all rows in both the DO data and the metab data).
3538 tar_target(
3639 p2a_do_and_metab ,
3740 p2_daily_with_seg_ids %> %
3841 full_join(p2_metab_filtered , by = c(" site_id" , " date" ))
3942 ),
4043
41- # # SPLIT SITES INTO (train) and (train and validation) ##
44+
45+ # # 2) SPLIT SITES INTO (train) and (train and validation) ##
4246 # char vector of well-observed train sites
4347 tar_target(
4448 p2a_trn_sites ,
@@ -87,7 +91,8 @@ p2a_targets_list <- list(
8791 sf :: st_as_sf(. , coords = c(" lon" ," lat" ), crs = unique(. $ epsg ))
8892 ),
8993
90- # # WRITE MODEL CONFIGURATION FILES ##
94+
95+ # # 3) WRITE MODEL CONFIGURATION FILES ##
9196 # Write base config file using inputs and parameters defined in _targets.R
9297 tar_target(
9398 p2a_config_base_yml ,
@@ -141,35 +146,40 @@ p2a_targets_list <- list(
141146 format = " file"
142147 ),
143148
144- # # WRITE OUT PARTITION INPUT AND OUTPUT DATA ##
145- # write met and seg attribute data for trn/val sites to zarr
146- # note - I have to subset inputs to only include the train/val sites before
147- # passing to subset_and_write_zarr or else I get a memory error on the join
148149
149- # # CHANGING X VARIABLES ##
150- # To change x variables for the model, they have to be added to the
151- # model specific config.yml file which can be found in
152- # 2a_model/src/model/{model ID}/config.yml
153-
154- # write trn and val input and output data to zarr
150+ # # 4) WRITE OUT PARTITION INPUT AND OUTPUT DATA ##
151+ # Subset trn/val input and output data to well-observed sites and format
152+ # for export. [Jeff]: note - I have to subset inputs to only include the
153+ # train/val sites before passing to subset_and_write_zarr or else I get a
154+ # memory error on the join.
155155 tar_target(
156156 p2a_well_obs_data ,
157157 {
158+ # use inner_join to keep sites that are within the set of trn/val sites
159+ # and are represented in both the met data and the seg attr data.
158160 inputs <- p2a_met_data_w_sites %> %
159161 filter(site_id %in% p2a_trn_val_sites ) %> %
160162 inner_join(p2a_seg_attr_w_sites , by = c(" site_id" , " COMID" ))
161163
162164 inputs_and_outputs <- inputs %> %
163- left_join(p2a_do_and_metab , by = c(" site_id" , " date" ))
165+ left_join(p2a_do_and_metab , by = c(" site_id" , " COMID " , " date" ))
164166
165- # note that if the name of well_obs_io.zarr is changed below, this change must
166- # also be made in 2a_model/src/Snakefile_base.smk (lines 32, 103, and 177) and
167- # in 2a_model/src/visualize_models.smk (line 6).
168- write_df_to_zarr(inputs_and_outputs , c(" site_id" , " date" ), " 2a_model/out/well_obs_io.zarr" )
169- },
170- format = " file"
167+ inputs_and_outputs
168+ }
171169 ),
172170
171+ # Write trn and val input and output data to zarr. Note that if the name of
172+ # well_obs_io.zarr is changed below, this change must also be made in
173+ # 2a_model/src/Snakefile_base.smk (lines 32, 103, and 177) and in
174+ # 2a_model/src/visualize_models.smk (line 6).
175+ tar_target(
176+ p2a_well_obs_data_zarr ,
177+ write_df_to_zarr(p2a_well_obs_data , c(" site_id" , " date" ), " 2a_model/out/well_obs_io.zarr" ),
178+ format = " file"
179+ ),
180+
181+
182+ # # 5) GATHER MODEL IDS AND KICK OFF SNAKEMAKE WORKFLOW TO MAKE MODEL PREDICTIONS ##
173183 # gather model ids - add to this list when you want to reproduce
174184 # outputs from a new model
175185 tar_target(
@@ -199,7 +209,8 @@ p2a_targets_list <- list(
199209 tar_target(
200210 p2a_metrics_files ,
201211 {
202- # we need these to make the prepped data file
212+ # we need these to make the prepped data file, so force a dependency of this
213+ # target on p2a_well_obs_data.
203214 p2a_well_obs_data
204215
205216 base_dir <- " 2a_model/src/models"
0 commit comments