@@ -17,7 +17,6 @@ log_info("Building betydata package data objects...")
1717
1818# Create output directories
1919dir.create(" data" , showWarnings = FALSE )
20- dir.create(" inst/extdata/parquet" , showWarnings = FALSE , recursive = TRUE )
2120
2221# Column type specifications for stable parsing
2322traitsview_cols <- cols(
@@ -70,12 +69,28 @@ traitsview <- read_csv(
7069 na = c(" " , " NA" )
7170)
7271
73- # Filter out checked = -1
74- traitsview <- traitsview [is.na(traitsview $ checked ) | traitsview $ checked != - 1 , ]
72+ # Summarize access_level before filtering -- flag non-public records
73+ access_summary <- table(traitsview $ access_level , useNA = " ifany" )
74+ log_info(" access_level distribution:" )
75+ for (lvl in names(access_summary )) {
76+ log_info(sprintf(" access_level = %s: %d records" , lvl , access_summary [[lvl ]]))
77+ }
78+
79+ # Keep only public records (access_level == 4)
80+ non_public <- sum(traitsview $ access_level != 4 , na.rm = TRUE )
81+ if (non_public > 0 ) {
82+ log_info(sprintf(" Removing %d non-public records (access_level != 4)" , non_public ))
83+ traitsview <- traitsview [traitsview $ access_level == 4 , ]
84+ }
7585
76- # Drop access_level column (all records are public, access_level = 4 )
86+ # Drop access_level column (all remaining records are public)
7787traitsview $ access_level <- NULL
7888
89+ # Convert checked = NA to checked = 0, then remove failed QC records
90+ traitsview <- traitsview | >
91+ dplyr :: mutate(checked = ifelse(is.na(checked ), 0L , checked )) | >
92+ dplyr :: filter(checked > = 0 )
93+
7994# Reorder columns: key analytical columns first, IDs and metadata last
8095col_order <- c(
8196 " trait" , " mean" , " units" , " scientificname" , " genus" ,
@@ -141,30 +156,6 @@ if (!is.null(pfts_priors)) usethis::use_data(pfts_priors, overwrite = TRUE, comp
141156if (! is.null(managements_treatments )) usethis :: use_data(managements_treatments , overwrite = TRUE , compress = " xz" )
142157if (! is.null(cultivars_pfts )) usethis :: use_data(cultivars_pfts , overwrite = TRUE , compress = " xz" )
143158
144-
145- log_info(" Saving Parquet files to inst/extdata/parquet/..." )
146- if (requireNamespace(" arrow" , quietly = TRUE )) {
147- arrow :: write_parquet(traitsview , " inst/extdata/parquet/traitsview.parquet" )
148- if (! is.null(species )) arrow :: write_parquet(species , " inst/extdata/parquet/species.parquet" )
149- if (! is.null(sites )) arrow :: write_parquet(sites , " inst/extdata/parquet/sites.parquet" )
150- if (! is.null(variables )) arrow :: write_parquet(variables , " inst/extdata/parquet/variables.parquet" )
151- if (! is.null(citations )) arrow :: write_parquet(citations , " inst/extdata/parquet/citations.parquet" )
152- if (! is.null(cultivars )) arrow :: write_parquet(cultivars , " inst/extdata/parquet/cultivars.parquet" )
153- if (! is.null(methods )) arrow :: write_parquet(methods , " inst/extdata/parquet/methods.parquet" )
154- if (! is.null(treatments )) arrow :: write_parquet(treatments , " inst/extdata/parquet/treatments.parquet" )
155- if (! is.null(pfts )) arrow :: write_parquet(pfts , " inst/extdata/parquet/pfts.parquet" )
156- if (! is.null(priors )) arrow :: write_parquet(priors , " inst/extdata/parquet/priors.parquet" )
157- if (! is.null(managements )) arrow :: write_parquet(managements , " inst/extdata/parquet/managements.parquet" )
158- if (! is.null(entities )) arrow :: write_parquet(entities , " inst/extdata/parquet/entities.parquet" )
159- if (! is.null(pfts_species )) arrow :: write_parquet(pfts_species , " inst/extdata/parquet/pfts_species.parquet" )
160- if (! is.null(pfts_priors )) arrow :: write_parquet(pfts_priors , " inst/extdata/parquet/pfts_priors.parquet" )
161- if (! is.null(managements_treatments )) arrow :: write_parquet(managements_treatments , " inst/extdata/parquet/managements_treatments.parquet" )
162- if (! is.null(cultivars_pfts )) arrow :: write_parquet(cultivars_pfts , " inst/extdata/parquet/cultivars_pfts.parquet" )
163- } else {
164- log_info(" arrow package not available, skipping Parquet export" )
165- }
166-
167-
168159# --- Generate datapackage.json ---
169160log_info(" Generating inst/metadata/datapackage.json..." )
170161dir.create(" inst/metadata" , showWarnings = FALSE , recursive = TRUE )
@@ -197,8 +188,9 @@ resources <- lapply(datasets, function(nm) {
197188 df <- get(nm )
198189 base <- list (
199190 name = nm ,
200- path = paste0(" data/" , nm , " .rda" ),
201- format = " rda"
191+ path = paste0(" data-raw/csv/" , nm , " .csv" ),
192+ format = " csv" ,
193+ mediatype = " text/csv"
202194 )
203195 if (nm == " traitsview" ) {
204196 base $ title <- " Traits and Yields View"
0 commit comments