Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 49 additions & 20 deletions data/cosmos_chronic/ingest.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ if (!identical(process$raw_state, current_state)) {
mutate(
yr_num = as.integer(sub("yr", "", yr_idx)),
time = year_dates[yr_num],
suppressed_flag = as.integer(
trimws(as.character(.data[[measure_names[3]]])) == "10 or fewer"
)
# suppressed_flag = as.integer(
# trimws(as.character(.data[[measure_names[3]]])) == "10 or fewer"
# )
) %>%
mutate(across(
all_of(measure_names),
Expand All @@ -266,7 +266,7 @@ if (!identical(process$raw_state, current_state)) {
as.numeric(gsub("%", "", v))
}
)) %>%
select(age, geography, time, all_of(measure_names), suppressed_flag)
select(age, geography, time, all_of(measure_names))
}

# ---------------------------------------------------------------------------
Expand All @@ -287,20 +287,38 @@ if (!identical(process$raw_state, current_state)) {
ob_csv,
skip_rows = 14,
measure_names = c("obesity_dx_ccw", "obesity_bmi", "n_patients_ob")
) %>%
select(-n_patients_ob)
) # %>%
# rename(suppressed_flag_ob = suppressed_flag)

# ---------------------------------------------------------------------------
# 5. Combine state data and write
# ---------------------------------------------------------------------------
combined_state <- full_join(dm, ob, by = c("age", "geography", "time")) %>%
select(age, geography, time, diabetes_a1c_6_5, diabetes_dx_ccw,
obesity_bmi, obesity_dx_ccw, n_patients_chronic,
suppressed_flag) %>%
obesity_bmi, obesity_dx_ccw, n_patients_chronic, n_patients_ob) %>%
mutate(
suppressed_diabetes_a1c_6_5 = as.integer(is.na(diabetes_a1c_6_5)),
suppressed_diabetes_dx_ccw = as.integer(is.na(diabetes_dx_ccw)),
suppressed_obesity_bmi = as.integer(is.na(obesity_bmi)),
suppressed_obesity_dx_ccw = as.integer(is.na(obesity_dx_ccw)),
suppressed_n_patients_chronic = as.integer(is.na(n_patients_chronic)),
suppressed_n_patients_ob = as.integer(is.na(n_patients_ob)),
across(c(diabetes_a1c_6_5, diabetes_dx_ccw, obesity_bmi, obesity_dx_ccw,
n_patients_chronic, n_patients_ob),
~ replace(.x, is.na(.x), 0))
) %>%
arrange(geography, age, time)

vroom::vroom_write(combined_state, "standard/state_year.csv.gz", ",")

tmp <- combined_state %>% select(diabetes_a1c_6_5, suppressed_diabetes_a1c_6_5,
diabetes_dx_ccw, suppressed_diabetes_dx_ccw,
obesity_bmi, suppressed_obesity_bmi,
obesity_dx_ccw, suppressed_obesity_dx_ccw,
n_patients_chronic, suppressed_n_patients_chronic,
n_patients_ob, suppressed_n_patients_ob
)

# ---------------------------------------------------------------------------
# 6. County-level import function (CSV files from staging_chronic)
# ---------------------------------------------------------------------------
Expand All @@ -325,9 +343,9 @@ if (!identical(process$raw_state, current_state)) {
age = gsub("65 Years or more", "65+ Years", age),
age = if_else(grepl("Total", age), "Total", age),

suppressed_flag = as.integer(
trimws(n_patients_chronic) == "10 or fewer"
),
# suppressed_flag = as.integer(
# trimws(n_patients_chronic) == "10 or fewer"
#),
ccw = as.numeric(na_if(gsub("%", "", ccw), "-")),
lab = as.numeric(na_if(gsub("%", "", lab), "-")),
n_patients_chronic = as.numeric(if_else(
Expand All @@ -348,7 +366,7 @@ if (!identical(process$raw_state, current_state)) {
time = paste0(yearset, "-01-01")
) %>%
dplyr::select(age, geography, time, ccw, lab,
n_patients_chronic, suppressed_flag) %>%
n_patients_chronic) %>% #, suppressed_flag
filter(!is.na(geography))
}

Expand All @@ -360,18 +378,29 @@ if (!identical(process$raw_state, current_state)) {
rename(diabetes_a1c_6_5 = lab,
diabetes_dx_ccw = ccw)

all_obesity_county <- lapply(stage_obesity_county, chronic_import_county) %>%
bind_rows() %>%
rename(obesity_bmi = lab,
obesity_dx_ccw = ccw) %>%
all_obesity_county <- lapply(stage_obesity_county, chronic_import_county) |>
bind_rows() |>
rename(obesity_bmi = lab,
obesity_dx_ccw = ccw,
n_patients_ob_county = n_patients_chronic) |> #, suppressed_flag_ob = suppressed_flag
unique()

combined_county <- all_obesity_county %>%
dplyr::select(-n_patients_chronic, -suppressed_flag) %>%
full_join(all_diabetes_county, by = c("geography", "time", "age")) %>%
combined_county <- all_obesity_county |>
full_join(all_diabetes_county, by = c("geography", "time", "age")) |>
select(age, geography, time, diabetes_a1c_6_5, diabetes_dx_ccw,
obesity_bmi, obesity_dx_ccw, n_patients_chronic,
suppressed_flag) %>%
n_patients_ob_county) |>
mutate(
suppressed_diabetes_a1c_6_5 = as.integer(is.na(diabetes_a1c_6_5)),
suppressed_diabetes_dx_ccw = as.integer(is.na(diabetes_dx_ccw)),
suppressed_obesity_bmi = as.integer(is.na(obesity_bmi)),
suppressed_obesity_dx_ccw = as.integer(is.na(obesity_dx_ccw)),
suppressed_n_patients_chronic = as.integer(is.na(n_patients_chronic)),
suppressed_n_patients_ob_county = as.integer(is.na(n_patients_ob_county)),
across(c(diabetes_a1c_6_5, diabetes_dx_ccw, obesity_bmi, obesity_dx_ccw,
n_patients_chronic, n_patients_ob_county),
~ replace(.x, is.na(.x), 0))
) |>
arrange(geography, age, time)

vroom::vroom_write(combined_county, "standard/county_year.csv.gz", ",")
Expand Down
124 changes: 104 additions & 20 deletions data/cosmos_chronic/measure_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,34 @@
"id": "n_patients_chronic",
"short_name": "Number of patients (chronic)",
"long_name": "Number of patients in the chronic disease sample",
"short_description": "Total number of patients in the state-level chronic disease sample.",
"long_description": "Total number of patients in the state-level chronic disease sample (diabetes denominator). Data accessed via Epic Cosmos SlicerDicer. Counts fewer than 10 are suppressed and imputed as 5.",
"short_description": "Total number of patients in the chronic disease sample.",
"long_description": "Total number of patients in the chronic disease sample (diabetes denominator). Data accessed via Epic Cosmos SlicerDicer. Counts fewer than 10 are suppressed and imputed as 5.",
"measure_type": "Count",
"unit": "patients",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"n_obesity_county": {
"id": "n_obesity_county",
"short_name": "Number of patients (chronic, county)",
"n_patients_ob": {
"id": "n_patients_ob",
"short_name": "Number of patients (obesity, state)",
"long_name": "Number of patients in the state-level obesity sample",
"short_description": "Total number of patients in the state-level obesity sample.",
"long_description": "Total number of patients in the state-level obesity sample. Data accessed via Epic Cosmos SlicerDicer. Counts fewer than 10 are suppressed and imputed as 5.",
"measure_type": "Count",
"unit": "patients",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"n_patients_ob_county": {
"id": "n_patients_ob_county",
"short_name": "Number of patients (obesity, county)",
"long_name": "Number of patients in the county-level obesity sample",
"short_description": "Total number of patients in the county-level obesity sample.",
"long_description": "Total number of patients within the county-level obesity sample. Data from Epic Cosmos. County-level data may have high missingness in some states.",
"measure_type": "integer",
"unit": "patient",
"long_description": "Total number of patients in the county-level obesity sample. Data accessed via Epic Cosmos SlicerDicer. County-level data may have high missingness in some states. Counts fewer than 10 are suppressed and imputed as 5.",
"measure_type": "Count",
"unit": "patients",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},
Expand Down Expand Up @@ -71,18 +83,6 @@
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_flag": {
"id": "suppressed_flag",
"short_name": "Suppressed flag",
"long_name": "Suppression flag for imputed counts",
"short_description": "Indicates whether the value was suppressed and imputed.",
"long_description": "Flag indicating whether the original count was suppressed due to privacy protections (counts fewer than 10) and imputed as 5. A value of 1 indicates the count was suppressed and imputed; 0 indicates the reported value is exact.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"diabetes_dx_ccw": {
"id": "diabetes_dx_ccw",
"short_name": "Diabetes CCW definition",
Expand All @@ -95,6 +95,90 @@
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_diabetes_a1c_6_5": {
"id": "suppressed_diabetes_a1c_6_5",
"short_name": "Suppressed flag (diabetes A1c)",
"long_name": "Suppression flag for diabetes A1c >= 6.5% percent",
"short_description": "Indicates whether the diabetes_a1c_6_5 value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_diabetes_dx_ccw": {
"id": "suppressed_diabetes_dx_ccw",
"short_name": "Suppressed flag (diabetes CCW)",
"long_name": "Suppression flag for diabetes CCW definition percent",
"short_description": "Indicates whether the diabetes_dx_ccw value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_obesity_bmi": {
"id": "suppressed_obesity_bmi",
"short_name": "Suppressed flag (BMI)",
"long_name": "Suppression flag for BMI >= 30 percent",
"short_description": "Indicates whether the obesity_bmi value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_obesity_dx_ccw": {
"id": "suppressed_obesity_dx_ccw",
"short_name": "Suppressed flag (obesity CCW)",
"long_name": "Suppression flag for obesity CCW definition percent",
"short_description": "Indicates whether the obesity_dx_ccw value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_n_patients_chronic": {
"id": "suppressed_n_patients_chronic",
"short_name": "Suppressed flag (chronic patients)",
"long_name": "Suppression flag for number of patients in the chronic disease sample",
"short_description": "Indicates whether the n_patients_chronic value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_n_patients_ob": {
"id": "suppressed_n_patients_ob",
"short_name": "Suppressed flag (obesity patients, state)",
"long_name": "Suppression flag for number of patients in the state-level obesity sample",
"short_description": "Indicates whether the n_patients_ob value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"suppressed_n_patients_ob_county": {
"id": "suppressed_n_patients_ob_county",
"short_name": "Suppressed flag (obesity patients, county)",
"long_name": "Suppression flag for number of patients in the county-level obesity sample",
"short_description": "Indicates whether the n_patients_ob_county value was suppressed.",
"long_description": "Due to privacy protections, percents with fewer than 5 events are suppressed and coded as 0.",
"measure_type": "Binary",
"unit": "0/1",
"time_resolution": "Year",
"sources": [{ "id": "epic_cosmos" }]
},

"_sources": {
"epic_cosmos": {
"name": "Epic Cosmos",
Expand Down
Loading