diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index d935b2a0..871aa21d 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -49,3 +49,4 @@ jobs: with: upload-snapshots: true build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' + error-on: '"error"' diff --git a/DESCRIPTION b/DESCRIPTION index 3d617bd9..ab11f694 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sirfunctions Title: Key Functions to Analyze Global Polio Surveillance Data -Version: 2.1.6 +Version: 2.1.7 Authors@R: c( person("Nishant", "Kishore", , "ynm2@cdc.gov", role = c("aut"), comment = c(ORCID = "0000-0003-0408-2747")), @@ -47,7 +47,8 @@ Imports: stats, rappdirs, rmarkdown, - grDevices + grDevices, + pins Suggests: arrow, qs2, diff --git a/NAMESPACE b/NAMESPACE index 8dff4417..50753d80 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,6 +104,7 @@ export(get_cdc_childvaxview_data) export(get_constant) export(get_ctry_abbrev) export(get_diff_cols) +export(get_edav_pins_board) export(get_lab_date_col_missingness) export(get_lab_locs) export(get_ppt_template) @@ -171,6 +172,7 @@ importFrom(lifecycle,badge) importFrom(lifecycle,deprecate_warn) importFrom(lifecycle,deprecated) importFrom(lifecycle,is_present) +importFrom(pins,board_azure) importFrom(purrr,map) importFrom(rappdirs,user_data_dir) importFrom(readr,read_csv) diff --git a/R/dal.R b/R/dal.R index 2ec10532..54d62998 100644 --- a/R/dal.R +++ b/R/dal.R @@ -3695,7 +3695,7 @@ force_load_polio_data_cache <- function(attach.spatial.data, output_format = ".r # Check if spatial data exists in the cache if (sirfunctions_io("exists.file", NULL, spatial_data_path, edav = FALSE)) { cli::cli_alert_info("Loading spatial data from cache") - spatial_data <- sirfunctions_io("read", NULL, spatial_data_path, edav = FALSE) + spatial.data <- sirfunctions_io("read", NULL, spatial_data_path, edav = FALSE) raw.data$global.ctry <- spatial.data$global.ctry raw.data$global.prov <- spatial.data$global.prov diff --git a/R/dr.lab.functions.R b/R/dr.lab.functions.R index 14d6bf9d..36df2c88 100644 --- a/R/dr.lab.functions.R +++ b/R/dr.lab.functions.R @@ -1232,7 +1232,7 @@ clean_lab_data <- function(lab_data, start_date, end_date, )) |> mutate(seq.capacity = if_else(country %in% c("NIGERIA", "UGANDA") & DateStoolCollected >= as_date("2025-02-01"), "Sequencing capacity", seq.capacity)) - return(lab_data) + return(lab_data_man) } diff --git a/R/dr.main.functions.R b/R/dr.main.functions.R index a29f74ca..3be9875a 100644 --- a/R/dr.main.functions.R +++ b/R/dr.main.functions.R @@ -251,9 +251,8 @@ update_data <- polis_folder = .polis_folder, use_edav = .use_edav ) - country_name <- stringr::str_replace_all(country_name, ", ", ",") |> - stringr::str_split(",") |> - unlist() + + country_data <- sirfunctions::extract_country_data(country_name, raw_data) readr::write_rds(country_data, path_to_save) message(paste0("Data saved at:\n", dr_data_path)) @@ -479,7 +478,7 @@ fetch_dr_data <- function(country, year, local_dr_repo) { #' environmental variables (i.e., [Sys.getenv()], where values for `x` related to #' the desk review is prefixed with `"DR"`) . The function only supports running one country at a time. #' -#' @param country_name `str` Name of the country. +#' @param country_name `str` Name of the country. You may pass multiple countries as a vector of strings. #' @param start_date `str` Start date of the desk review. If `NULL`, defaults to four years #' from when the function was ran on January 1st. #' @param end_date `str` End date of the desk review. If `NULL`, defaults to six weeks from when @@ -500,6 +499,7 @@ fetch_dr_data <- function(country, year, local_dr_repo) { #' \dontrun{ #' ctry.data <- init_dr("algeria", source = F) # Sets up folder in the current working directory #' ctry.data <- init_dr("algeria", branch = "dev") # Use functions from the dev branch +#' ctry.data <- init_dr(c("algeria", "nigeria"), source = F) # Multiple countries. #' } #' #' @export @@ -540,13 +540,14 @@ init_dr <- # Set up local directory for storing for data and metadata df_name <- - stringr::str_c(stringr::str_to_lower(country_name)) + stringr::str_c(stringr::str_to_lower(country_name)) |> + paste(collapse = ", ") # Relative path of where data and metadata is stored dr_path <- file.path( local_dr_folder, - stringr::str_to_lower(country_name), + stringr::str_to_lower(df_name), year ) diff --git a/R/get_edav_pins_board.R b/R/get_edav_pins_board.R new file mode 100644 index 00000000..bc0b476f --- /dev/null +++ b/R/get_edav_pins_board.R @@ -0,0 +1,24 @@ +#' Obtain a pins board from EDAV +#' +#' @description +#' This is a convenience function to obtain a pins board in EDAV. After the pins +#' board is loaded, several functions from the pins package are available to explore +#' the board. Please see the pins package [get started](https://pins.rstudio.com/articles/pins.html) page. +#' +#' @param board_loc `str` Location of the pins board in Azure. +#' @param azcontainer `azcontainer` An Azure container. +#' +#' @returns `pins board` An Azure pins board. +#' @export +#' +#' @examples +#' \dontrun{ +#' edav_board <- get_edav_pins_board() +#' } +get_edav_pins_board <- function(board_loc = "GID/PEB/SIR/pins_board", + azcontainer = get_azure_storage_connection()) { + + azboard <- pins::board_azure(azcontainer, board_loc) + + return(azboard) +} diff --git a/R/kpi.figure.functions.R b/R/kpi.figure.functions.R index 4e82d4e3..23212f87 100644 --- a/R/kpi.figure.functions.R +++ b/R/kpi.figure.functions.R @@ -529,9 +529,11 @@ generate_kpi_npafp_bar <- function(c1, afp_data, ctry_abbrev <- get_ctry_abbrev(afp_data) priority_ctry <- c1 |> dplyr::left_join(ctry_abbrev, - by = c("ctry" = "place.admin.0", "Region" = "whoregion")) |> + by = c("ctry" = "place.admin.0")) |> dplyr::filter(.data$`SG Priority Level` == "HIGH") |> - dplyr::mutate(prop_met_npafp = .data$prop_met_npafp) + # Remove whoregion from ctry_abbrev and use Region since + # c1 already accounts for Indonesia change and it's not used here + dplyr::select(-whoregion) bar_plot <- generate_kpi_barchart(priority_ctry, "prop_met_npafp", @@ -587,8 +589,11 @@ generate_kpi_evdetect_bar <- function(c1, afp_data, ctry_abbrev <- get_ctry_abbrev(afp_data) priority_ctry <- c1 |> dplyr::left_join(ctry_abbrev, - by = c("ctry" = "place.admin.0", "Region" = "whoregion")) |> - dplyr::mutate(prop_met_ev = .data$prop_met_ev) + by = c("ctry" = "place.admin.0")) |> + # Remove whoregion from ctry_abbrev and use Region since + # c1 already accounts for Indonesia change and it's not used here + dplyr::select(-whoregion) |> + dplyr::filter(`SG Priority Level` == "HIGH") if (!is.null(who_region)) { priority_ctry <- priority_ctry |> dplyr::filter(Region %in% who_region) @@ -635,9 +640,11 @@ generate_kpi_stoolad_bar <- function(c1, afp_data, ctry_abbrev <- get_ctry_abbrev(afp_data) priority_ctry <- c1 |> dplyr::left_join(ctry_abbrev, - by = c("ctry" = "place.admin.0", "Region" = "whoregion")) |> - dplyr::filter(.data$`SG Priority Level` == "HIGH") |> - dplyr::mutate(prop_met_stool = .data$prop_met_stool) + by = c("ctry" = "place.admin.0")) |> + # Remove whoregion from ctry_abbrev and use Region since + # c1 already accounts for Indonesia change and it's not used here + dplyr::select(-whoregion) |> + dplyr::filter(.data$`SG Priority Level` == "HIGH") bar_plot <- generate_kpi_barchart(priority_ctry, "prop_met_stool", @@ -736,8 +743,14 @@ generate_kpi_violin <- function( #' @param rolling `logical` Using rolling periods or year-to-year? Defaults to `TRUE`. #' @param who_region `list` Regions to display. Defaults to `NULL`, which shows #' all of the regions. +#' @param risk_table `tibble` Priority level of each country. Defaults to `NULL`, +#' which will download the information directly from EDAV. +#' @param lab_locs `tibble` Summary of the sequencing capacities of labs. +#' Output of [get_lab_locs()]. Defaults to `NULL`, which will download the information +#' directly from EDAV. #' @param output_path `str` Where to output the figure to. -#' @param y_max `num` The maximum y-axis value. +#' @param y_max `num` The maximum y-axis value. Defaults to `NULL`, which will compute the +#' y_max as the max value of days to notify hq. #' #' @returns `ggplot` A violin plot showing timeliness of detection. #' @export @@ -752,12 +765,14 @@ generate_timely_det_violin <- function(raw_data, priority_level = c("HIGH", "MEDIUM", "LOW (WATCHLIST)", "LOW"), who_region = NULL, rolling = TRUE, + risk_table = NULL, + lab_locs = NULL, output_path = Sys.getenv("KPI_FIGURES"), - y_max = 250) { + y_max = NULL) { start_date <- lubridate::as_date(start_date) end_date <- lubridate::as_date(end_date) - pos <- generate_pos_timeliness(raw_data, start_date, end_date) + pos <- generate_pos_timeliness(raw_data, start_date, end_date, risk_table, lab_locs) ctry_abbrev <- get_ctry_abbrev(raw_data$afp) color.risk.cat <- c( "HIGH" = "#d73027", @@ -774,27 +789,40 @@ generate_timely_det_violin <- function(raw_data, } pos_filtered <- pos |> + dplyr::mutate(whoregion = get_region(place.admin.0)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> dplyr::filter(.data$`SG Priority Level` %in% priority_level, .data$whoregion %in% who_region) |> dplyr::left_join(ctry_abbrev, by = c("place.admin.0", "whoregion")) |> - dplyr::mutate(seq_lab = case_when( - .data$seq.capacity == "no" ~ "No sequencing capacity", - .data$seq.capacity == "yes" ~ "Sequencing capacity" - )) |> + # Manually change the region of Indonesia based on change + # since get_region() defaults to WPRO now, we have to do the opposite + # to assign Indonesia samples to SEARO prior to May 23, 2025 + # dplyr::mutate(whoregion = ifelse(place.admin.0 == "INDONESIA" & + # analysis_year_start < lubridate::as_date("2025-05-23"), "SEARO", whoregion)) |> + dplyr::mutate(seq_lab = stringr::str_to_lower(seq.capacity), + seq_lab = ifelse(seq_lab == "yes", + "Sequencing capacity", + "No sequencing capacity" + )) |> dplyr::filter(!is.na(.data$seq_lab)) |> dplyr::mutate( sg_priority_level = factor(.data$`SG Priority Level`, levels = c( "LOW", "LOW (WATCHLIST)", "MEDIUM", "HIGH")) ) + if (is.null(y_max)) { + y_max <- max(pos_filtered$ontonothq, na.rm = TRUE) + } + if (rolling) { - facets <- ggh4x::facet_nested(rolling_period~seq_lab+who.region, + facets <- ggh4x::facet_nested(rolling_period~seq_lab+whoregion, scales = "free", space = "free", labeller = ggplot2::label_wrap_gen(13), switch = "y") } else { - facets <- ggh4x::facet_nested(year~seq_lab+who.region, + facets <- ggh4x::facet_nested(year~seq_lab+whoregion, scales = "free", space = "free", labeller = ggplot2::label_wrap_gen(13), switch = "y") @@ -811,7 +839,7 @@ generate_timely_det_violin <- function(raw_data, plot_mock_legend <- ggpubr::get_legend(plot_mock) plot_1 <- generate_kpi_violin(pos_filtered |> - filter(seq.capacity == "no"), "ctry.short", "ontonothq", + filter(seq_lab == "No sequencing capacity"), "ctry.short", "ontonothq", "sg_priority_level", facets, 46, y.max = y_max) @@ -820,7 +848,7 @@ generate_timely_det_violin <- function(raw_data, name = "Priority Level", na.value = "white") - plot_2 <- generate_kpi_violin(pos_filtered |> filter(seq.capacity == "yes"), "ctry.short", "ontonothq", + plot_2 <- generate_kpi_violin(pos_filtered |> filter(seq_lab == "Sequencing capacity"), "ctry.short", "ontonothq", "sg_priority_level", facets, 35, y.max = y_max) @@ -917,6 +945,9 @@ generate_timely_ship_violin <- function(afp_data, afp_filtered <- afp_data |> dplyr::filter(.data$`SG Priority Level` %in% priority_level, .data$whoregion %in% who_region) |> + dplyr::mutate(whoregion = get_region(place.admin.0)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> dplyr::left_join(ctry_abbrev, by = c("ctry" = "place.admin.0", "whoregion")) |> dplyr::mutate( @@ -1038,10 +1069,16 @@ generate_lab_culture_violin <- function(lab_data, afp_data, lab_filtered <- lab_data |> dplyr::left_join(ctry_abbrev, by = c("country" = "place.admin.0", "whoregion")) |> + dplyr::mutate(whoregion = get_region(country)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> add_risk_category(ctry_col = "country") |> dplyr::mutate( year = lubridate::year(.data$DateStoolCollected) ) |> + # Filter out "incorrect" lab data where the date onset doesn't match + # the stool collection date + dplyr::filter(dplyr::between(DateStoolCollected, analysis_year_start, analysis_year_end)) |> dplyr::filter(.data$`SG Priority Level` %in% priority_level, !is.na(.data$culture.itd.cat), dplyr::between(DateStoolCollected, start_date, end_date)) |> @@ -1124,7 +1161,13 @@ generate_lab_itd_violin <- function(lab_data, afp_data, lab_filtered <- lab_data |> dplyr::left_join(ctry_abbrev, by = c("country" = "place.admin.0", "whoregion")) |> + dplyr::mutate(whoregion = get_region(country)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> add_risk_category(ctry_col = "country") |> + # Filter out "incorrect" lab data where the date onset doesn't match + # the stool collection date + dplyr::filter(dplyr::between(DateStoolCollected, analysis_year_start, analysis_year_end)) |> dplyr::mutate( year = lubridate::year(.data$DateStoolCollected) ) |> @@ -1210,7 +1253,13 @@ generate_lab_seqship_violin <- function(lab_data, afp_data, lab_filtered <- lab_data |> dplyr::left_join(ctry_abbrev, by = c("country" = "place.admin.0", "whoregion")) |> + dplyr::mutate(whoregion = get_region(country)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> add_risk_category(ctry_col = "country") |> + # Filter out "incorrect" lab data where the date onset doesn't match + # the stool collection date + dplyr::filter(dplyr::between(DateStoolCollected, analysis_year_start, analysis_year_end)) |> dplyr::mutate( year = lubridate::year(.data$DateStoolCollected) ) |> @@ -1280,7 +1329,7 @@ generate_lab_seqres_violin <- function(lab_data, afp_data, who_region = NULL, rolling = TRUE, output_path = Sys.getenv("KPI_FIGURES"), - y_max = 60) { + y_max = NULL) { start_date <- lubridate::as_date(start_date) end_date <- lubridate::as_date(end_date) @@ -1297,7 +1346,12 @@ generate_lab_seqres_violin <- function(lab_data, afp_data, lab_filtered <- lab_data |> dplyr::left_join(ctry_abbrev, by = c("country" = "place.admin.0", "whoregion")) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> add_risk_category(ctry_col = "country") |> + # Filter out "incorrect" lab data where the date onset doesn't match + # the stool collection date + dplyr::filter(dplyr::between(DateStoolCollected, analysis_year_start, analysis_year_end)) |> dplyr::mutate( year = lubridate::year(.data$DateStoolCollected) ) |> @@ -1309,11 +1363,16 @@ generate_lab_seqres_violin <- function(lab_data, afp_data, "LOW", "LOW (WATCHLIST)", "MEDIUM", "HIGH")) ) + if (!is.null(who_region)) { lab_filtered <- lab_filtered |> dplyr::filter(.data$whoregion %in% who_region) } + if (is.null(y_max)) { + y_max <- as.numeric(max(lab_filtered$days.seq.rec.res, na.rm = TRUE)) + } + if (rolling) { facets <- ggh4x::facet_nested(rolling_period ~ seq.cat + seq.lab, scales = "free", space = "free", @@ -1410,7 +1469,13 @@ generate_lab_itdres_seqres_violin <- function(lab_data, afp_data, lab_filtered <- lab_data |> dplyr::left_join(ctry_abbrev, by = c("country" = "place.admin.0", "whoregion")) |> + dplyr::mutate(whoregion = get_region(country)) |> + # Remove other who region columns because it's confusing + dplyr::select(-dplyr::any_of(c("who.region", "Region"))) |> add_risk_category(ctry_col = "country") |> + # Filter out "incorrect" lab data where the date onset doesn't match + # the stool collection date + dplyr::filter(dplyr::between(DateStoolCollected, analysis_year_start, analysis_year_end)) |> dplyr::mutate( year = lubridate::year(.data$DateStoolCollected) ) |> diff --git a/R/kpi.main.functions.R b/R/kpi.main.functions.R index 63b638e3..4bbd522e 100644 --- a/R/kpi.main.functions.R +++ b/R/kpi.main.functions.R @@ -196,7 +196,7 @@ get_ctry_abbrev <- function(afp_data) { .data$whoregion != "AFRO" ~ .data$country.iso3 ) ) |> - dplyr::select("ctry.short", "place.admin.0", "whoregion") |> + dplyr::select("ctry.short", "place.admin.0") |> # fixing bad abbreviation in Gabon and turning all to upper case to eliminate dupes dplyr::mutate( ctry.short = ifelse(.data$place.admin.0 == "GABON" & .data$ctry.short == "BUU", @@ -218,13 +218,17 @@ get_ctry_abbrev <- function(afp_data) { !(place.admin.0 == "GHANA" & ctry.short == "TOG"), !(place.admin.0 == "INDIA" & ctry.short == "PAK"), !(place.admin.0 == "PAKISTAN" & ctry.short == "IND"), - !(place.admin.0 == "ZAMBIA" & ctry.short == "TAN") + !(place.admin.0 == "ZAMBIA" & ctry.short == "TAN"), + !(place.admin.0 == "PARAGUAY" & ctry.short == "TTO") ) |> dplyr::mutate(ctry.short = dplyr::case_when( ctry.short == "IND" & place.admin.0 == "INDONESIA" ~ "IDN", .default = ctry.short )) + ctry_abbrev <- ctry_abbrev |> + dplyr::mutate(whoregion = get_region(place.admin.0)) + summarize_ctry_abbrev <- ctry_abbrev |> dplyr::group_by(place.admin.0) |> dplyr::summarize(n = n()) |> @@ -232,7 +236,7 @@ get_ctry_abbrev <- function(afp_data) { dplyr::ungroup() if (nrow(summarize_ctry_abbrev) > 0) { - cli::cli_alert_warning("Some values in the country lookup table are not unique for the following countries: ") + cli::cli_alert_warning("Some values in the country lookup table are not unique for the following countries, please updated code in get_ctry_abbrev: ") non_unique <- ctry_abbrev |> dplyr::filter(place.admin.0 %in% summarize_ctry_abbrev$place.admin.0) @@ -301,11 +305,12 @@ generate_kpi_template <- function(output_path, name, edav) { stool_bar <- "generate_kpi_stoolad_bar(c1, raw_data$afp)" ev_bar <- "generate_kpi_evdetect_bar(c1, raw_data$afp)" - timely_violin <- 'generate_timely_det_violin(raw_data, start_date, end_date)' + culture_violin <- 'generate_lab_culture_violin(lab_data, raw_data$afp, start_date, end_date)' itd_violin <- 'generate_lab_itd_violin(lab_data, raw_data$afp, start_date, end_date)' seqship_violin <- 'generate_lab_seqship_violin(lab_data, raw_data$afp, start_date, end_date)' seqres_violin <- 'generate_lab_seqres_violin(lab_data, raw_data$afp, start_date, end_date)' + timely_violin <- 'generate_timely_det_violin(raw_data, start_date, end_date)' kpi_tile <- 'generate_kpi_tile(c1)' @@ -337,9 +342,46 @@ generate_kpi_template <- function(output_path, name, edav) { "# and may take a while to complete.", sg_priority_map, npafp_kpi_loop, stool_kpi_loop, ev_kpi_loop, "\n", npafp_bar, stool_bar, ev_bar, "\n", - "# Adjust the y_max as needed via the 'y_max' parameter", - timely_violin, culture_violin, itd_violin, seqship_violin, - seqres_violin, "\n", + "# Violin plots ----", + "", + "# Before running plots, check the indicator ranges below to determine if you need to change y_max.", + "# Note these data are approximations; ranges reflect the full dataset before additional filtering in plots.", + "# Note: some range checks use internal helper functions from sirfunctions.", + "# Examine plots carefully to finalize y_max.", + "# Example to change y_max: generate_lab_seqres_violin(lab_data, raw_data$afp, start_date, end_date, y_max = 250)", + "", + "# Build base dataset for lab violin range checks", + 'lab_kpi_check <- sirfunctions:::generate_kpi_lab_timeliness(lab_data, start_date, end_date, raw_data$afp)', + "", + "# For generate_lab_culture_violin()", + "# Default y_max is 60", + 'range(lab_kpi_check$days.lab.culture, na.rm = TRUE)', + 'summary(lab_kpi_check$days.lab.culture)', + "", + "# For generate_lab_itd_violin()", + "# Default y_max is 30", + 'range(lab_kpi_check$days.culture.itd, na.rm = TRUE)', + 'summary(lab_kpi_check$days.culture.itd)', + "", + "# For generate_lab_seqship_violin()", + "# Default y_max is 50", + 'range(lab_kpi_check$days.seq.ship, na.rm = TRUE)', + 'summary(lab_kpi_check$days.seq.ship)', + "", + "# For generate_lab_seqres_violin()", + "# Default y_max is dynamic to the max of days.seq.rec.res", + 'range(as.numeric(lab_kpi_check$days.seq.rec.res), na.rm = TRUE)', + 'summary(as.numeric(lab_kpi_check$days.seq.rec.res))', + "", + "# For generate_timely_det_violin()", + "# Default y_max is dynamic to the max of ontonothq", + 'pos_timeliness_check <- sirfunctions:::generate_pos_timeliness(raw_data, start_date, end_date)', + 'range(pos_timeliness_check$ontonothq, na.rm = TRUE)', + 'summary(pos_timeliness_check$ontonothq)', + "", + "", + "# Build plots ----", + culture_violin, itd_violin, seqship_violin, seqres_violin, timely_violin, "\n", "# Generate C1 KPI tile ----", kpi_tile, "\n", "# Export GPSAP tables ----", @@ -347,4 +389,5 @@ generate_kpi_template <- function(output_path, name, edav) { ) writeLines(output_string, conn) + close(conn) } diff --git a/R/kpi.table.functions.R b/R/kpi.table.functions.R index 4c1b311c..5c4a882c 100644 --- a/R/kpi.table.functions.R +++ b/R/kpi.table.functions.R @@ -158,6 +158,26 @@ generate_pos_timeliness <- function(raw_data, start_date, end_date, pos <- add_rolling_years(pos, start_date, end_date, "dateonset") pos <- add_seq_capacity(pos, ctry_col = "place.admin.0", lab_locs) + # Manual edit based on changes to the sequencing lab list in Feb 2025 + # There is no collection date, so will use dateonset to classify + pos <- pos |> + mutate(seq.lab = case_when( + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab == "Cameroon" ~ "NICD-South Africa", + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab == "ETHIOPIA/ KEMRI-Kenya" ~ "UVRI-Uganda", + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab %in% c("Ibadan-Nigeria, Maiduguri-Nigeria", "Nigeria") ~ "Ibadan-Nigeria", + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab == "KEMRI-Kenya" ~ "UVRI-Uganda", + place.admin.0 == "UGANDA" & dateonset >= as_date("2025-02-01") ~ "UVRI-Uganda", + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab == "Senegal" ~ "NICD-South Africa", + seq.lab == "CDC-Atlanta" & dateonset >= as_date("2025-02-01") & culture.itd.lab == "Varied (KEMRI-Kenya/ Oman/ Jordan)" ~ "Varied (UVRI/ Oman/ Jordan)", + .default = seq.lab + )) |> + mutate(seq.cat = case_when( + dateonset >= as_date("2025-02-01") & culture.itd.lab %in% c("Ibadan-Nigeria, Maiduguri-Nigeria", "Nigeria") & seq.lab == "Ibadan-Nigeria" ~ "Not shipped for sequencing", + place.admin.0 == "UGANDA" & dateonset >= as_date("2025-02-01") ~ "Not shipped for sequencing", + .default = seq.cat + )) |> + mutate(seq.capacity = if_else(place.admin.0 %in% c("NIGERIA", "UGANDA") & dateonset >= as_date("2025-02-01"), "yes", seq.capacity)) + pos_summary <- pos |> dplyr::mutate( ontonothq = as.numeric(lubridate::as_date(.data$datenotificationtohq) - @@ -718,7 +738,7 @@ generate_c1_table <- function(raw_data, start_date, end_date, # Calculate meeting indicators dist_lookup_table <- raw_data$dist.pop |> - dplyr::select(ctry = ADM0_NAME, prov = ADM1_NAME, dist = ADM2_NAME, adm2guid) |> + dplyr::select(ctry, prov, dist, adm2guid) |> dplyr::distinct() # Flag any inconsistent GUIDs to say any calculations are invalid @@ -886,6 +906,11 @@ generate_c1_table <- function(raw_data, start_date, end_date, dplyr::mutate(dplyr::across(dplyr::ends_with("label"), \(x) tidyr::replace_na(x, "0/0"))) |> dplyr::rename(Region = whoregion) + # Manual edit to reflect Indonesia WPRO change in May 23, 2025 + combine <- combine |> + dplyr::mutate(Region = ifelse(ctry == "INDONESIA" & + analysis_year_start >= lubridate::as_date("2025-05-23"), "WPRO", Region)) + cli::cli_progress_update() cli::cli_progress_done() @@ -1011,14 +1036,7 @@ generate_c2_table <- function(afp_data, pop_data, start_date, end_date, dateinvest = "date.invest", cdc.classification.all = "cdc.class" ) - pop_data <- dplyr::rename_with(pop_data, recode, - ADM0_NAME = "ctry", - ADM1_NAME = "prov", - ADM2_NAME = "dist", - ADM0_GUID = "adm0guid", - u15pop.prov = "u15pop", - WHO_REGION = "who_region" - ) + cli::cli_progress_update() # Add required columns @@ -1376,17 +1394,17 @@ generate_c2_table <- function(afp_data, pop_data, start_date, end_date, # NPAFP is only NA if it's missing population dplyr::mutate(npafp_rate = dplyr::if_else(npafp_cat != "Missing Pop" & is.nan(npafp_rate), 0, npafp_rate)) - # Add region and risk levels - region_lookup_table <- pop_data |> - dplyr::select(dplyr::any_of(c("ctry", "prov", "dist", "who_region"))) |> - dplyr::distinct() - results <- add_risk_category(results, risk_table) |> - dplyr::left_join(region_lookup_table) |> + dplyr::mutate(whoregion = get_region(ctry)) |> dplyr::select(-Region) |> - dplyr::rename(Region = who_region) |> + dplyr::rename(Region = whoregion) |> tidyr::replace_na(list(`SG Priority Level` = "LOW")) + # Manual edit to reflect Indonesia WPRO change in May 23, 2025 + results <- results |> + dplyr::mutate(Region = ifelse(ctry == "INDONESIA" & + analysis_year_start >= lubridate::as_date("2025-05-23"), "WPRO", Region)) + cli::cli_progress_done() return(results) diff --git a/R/sirfunctions-package.R b/R/sirfunctions-package.R index 41c0aa3b..f44b942c 100644 --- a/R/sirfunctions-package.R +++ b/R/sirfunctions-package.R @@ -44,6 +44,7 @@ #' @importFrom lifecycle deprecate_warn #' @importFrom lifecycle deprecated #' @importFrom lifecycle is_present +#' @importFrom pins board_azure #' @importFrom purrr map #' @importFrom rappdirs user_data_dir #' @importFrom readr read_csv diff --git a/man/generate_lab_seqres_violin.Rd b/man/generate_lab_seqres_violin.Rd index 3f0d5d40..b17efe67 100644 --- a/man/generate_lab_seqres_violin.Rd +++ b/man/generate_lab_seqres_violin.Rd @@ -13,7 +13,7 @@ generate_lab_seqres_violin( who_region = NULL, rolling = TRUE, output_path = Sys.getenv("KPI_FIGURES"), - y_max = 60 + y_max = NULL ) } \arguments{ diff --git a/man/generate_timely_det_violin.Rd b/man/generate_timely_det_violin.Rd index 69eba8ba..72ae188a 100644 --- a/man/generate_timely_det_violin.Rd +++ b/man/generate_timely_det_violin.Rd @@ -11,8 +11,10 @@ generate_timely_det_violin( priority_level = c("HIGH", "MEDIUM", "LOW (WATCHLIST)", "LOW"), who_region = NULL, rolling = TRUE, + risk_table = NULL, + lab_locs = NULL, output_path = Sys.getenv("KPI_FIGURES"), - y_max = 250 + y_max = NULL ) } \arguments{ @@ -30,9 +32,17 @@ all of the regions.} \item{rolling}{\code{logical} Using rolling periods or year-to-year? Defaults to \code{TRUE}.} +\item{risk_table}{\code{tibble} Priority level of each country. Defaults to \code{NULL}, +which will download the information directly from EDAV.} + +\item{lab_locs}{\code{tibble} Summary of the sequencing capacities of labs. +Output of \code{\link[=get_lab_locs]{get_lab_locs()}}. Defaults to \code{NULL}, which will download the information +directly from EDAV.} + \item{output_path}{\code{str} Where to output the figure to.} -\item{y_max}{\code{num} The maximum y-axis value.} +\item{y_max}{\code{num} The maximum y-axis value. Defaults to \code{NULL}, which will compute the +y_max as the max value of days to notify hq.} } \value{ \code{ggplot} A violin plot showing timeliness of detection. diff --git a/man/get_edav_pins_board.Rd b/man/get_edav_pins_board.Rd new file mode 100644 index 00000000..f41c080e --- /dev/null +++ b/man/get_edav_pins_board.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_edav_pins_board.R +\name{get_edav_pins_board} +\alias{get_edav_pins_board} +\title{Obtain a pins board from EDAV} +\usage{ +get_edav_pins_board( + board_loc = "GID/PEB/SIR/pins_board", + azcontainer = get_azure_storage_connection() +) +} +\arguments{ +\item{board_loc}{\code{str} Location of the pins board in Azure.} + +\item{azcontainer}{\code{azcontainer} An Azure container.} +} +\value{ +\verb{pins board} An Azure pins board. +} +\description{ +This is a convenience function to obtain a pins board in EDAV. After the pins +board is loaded, several functions from the pins package are available to explore +the board. Please see the pins package \href{https://pins.rstudio.com/articles/pins.html}{get started} page. +} +\examples{ +\dontrun{ +edav_board <- get_edav_pins_board() +} +} diff --git a/man/init_dr.Rd b/man/init_dr.Rd index 14ebb26e..b411f3a7 100644 --- a/man/init_dr.Rd +++ b/man/init_dr.Rd @@ -20,7 +20,7 @@ init_dr( ) } \arguments{ -\item{country_name}{\code{str} Name of the country.} +\item{country_name}{\code{str} Name of the country. You may pass multiple countries as a vector of strings.} \item{start_date}{\code{str} Start date of the desk review. If \code{NULL}, defaults to four years from when the function was ran on January 1st.} @@ -63,6 +63,7 @@ the desk review is prefixed with \code{"DR"}) . The function only supports runni \dontrun{ ctry.data <- init_dr("algeria", source = F) # Sets up folder in the current working directory ctry.data <- init_dr("algeria", branch = "dev") # Use functions from the dev branch +ctry.data <- init_dr(c("algeria", "nigeria"), source = F) # Multiple countries. } }