From 03267543a402e4d205be1fa764b8208e2970663f Mon Sep 17 00:00:00 2001 From: GShotwell Date: Tue, 31 Oct 2017 16:24:34 -0300 Subject: [PATCH 01/13] Initial implementation of mrFromDelim --- NAMESPACE | 1 + R/make-array.R | 64 ++++++++++++++++++++++++++++++++ man/mrFromDelim.Rd | 31 ++++++++++++++++ tests/testthat/test-make-array.R | 60 ++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 man/mrFromDelim.Rd diff --git a/NAMESPACE b/NAMESPACE index 628a65541..4a307ce37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -164,6 +164,7 @@ export(mergeFork) export(mkdir) export(modifyWeightVariables) export(moveToGroup) +export(mrFromDelim) export(mv) export(newDataset) export(newDatasetByColumn) diff --git a/R/make-array.R b/R/make-array.R index a2ba874d5..8b272ddbe 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -80,6 +80,69 @@ makeMR <- function (subvariables, name, selections, ...) { return(vardef) } + +#' Create Multiple Response Variable from Delimited +#' +#' Surveys often record multiple response questions in delimited lists where +#' each respondent's selections are separated by a delimiter like `;` or `|`. +#' This function breaks the delimited responses into subvariables, uploads those +#' subvariables to crunch, and finally creates a multiple response variable from +#' them. +#' +#' @param var The variable containing the delimited responses +#' @param delim The delimiter separating the repsonses +#' @param name The name of the resulting MR variable +#' @param selected A character string used to indicate a selection +#' @param not_selected Character string identifying non-selection +#' @param unanswered Character string indicating non-response +#' @param ... Other arguments to be passed on to [makeMR()] +#' +#' @return +#' @export +mrFromDelim <- function(var, + delim, + name, + selected = "selected", + not_selected = "not_selected", + unanswered = NA, + ...) { + if (missing(name)) { + halt("Must supply a name for the new variable") + } + if (is.Categorical(var) || is.Text(var)) { + v <- as.vector(var) + } else { + halt(dQuote("var"), " must be a Categorical or Text Crunch Variable.") + } + uniques <- unique(v[!is.na(v)]) + cats <- unique(unlist(strsplit(uniques, delim))) + vardefs <- lapply(cats, function(x) createSubvarDef(v, x, delim, + selected, not_selected, unanswered_val = unanswered, missing = is.na(v))) + ds <- loadDataset(crunch:::datasetReference(var)) + addVariables(ds, vardefs) + ds <- refresh(ds) + return(makeMR(ds[, cats], name = name, selections = selected)) +} + +createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered_val, missing) { + out <- values <- grepl(crunch:::buildRegex(str, delim), var) + out[values] <- selected + out[!values] <- not_selected + out[missing] <- unanswered_val + return(toVariable(factor(out), name = str)) +} + +buildRegex <- function(str, delim){ + delim <- paste0('\\', delim) + regex <- paste0( + "^", str, delim, "|", + delim, str, delim, "|", + delim, str, "$", "|", + "^", str, "$") + return(regex) +} + + #' @rdname makeArray #' @export deriveArray <- function (subvariables, name, selections, ...) { @@ -105,6 +168,7 @@ deriveArray <- function (subvariables, name, selections, ...) { return(VariableDefinition(derivation=derivation, name=name, ...)) } + #' Rearrange array subvariables #' #' Sometimes it is useful to group subvariables across arrays in order to diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd new file mode 100644 index 000000000..5ab140bbf --- /dev/null +++ b/man/mrFromDelim.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make-array.R +\name{mrFromDelim} +\alias{mrFromDelim} +\title{Create Multiple Response Variable from Delimited} +\usage{ +mrFromDelim(var, delim, name, selected = "selected", + not_selected = "not_selected", unanswered = NA, ...) +} +\arguments{ +\item{var}{The variable containing the delimited responses} + +\item{delim}{The delimiter separating the repsonses} + +\item{name}{The name of the resulting MR variable} + +\item{selected}{A character string used to indicate a selection} + +\item{not_selected}{Character string identifying non-selection} + +\item{unanswered}{Character string indicating non-response} + +\item{...}{Other arguments to be passed on to \code{\link[=makeMR]{makeMR()}}} +} +\description{ +Surveys often record multiple response questions in delimited lists where +each respondent's selections are separated by a delimiter like \code{;} or \code{|}. +This function breaks the delimited responses into subvariables, uploads those +subvariables to crunch, and finally creates a multiple response variable from +them. +} diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index ed08a48d6..fcde61a79 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -57,6 +57,51 @@ with_mock_crunch({ name="Gen"), "Undefined columns selected: NOTAVARIABLE") }) + + test_that("mrFromDelim errors correctly", { + expect_error(mrFromDelim(ds$var, "; ",), + "Must supply a name for the new variable") + expect_error(mrFromDelim(mtcars$cyl, name = "name"), + paste0(dQuote("var"), " must be a Categorical or Text Crunch Variable.")) + }) + + test_that("createSubvarDef generates the correct variable definition", { + v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak", NA) + expected <- structure(list( + values = c(-1L, -1L, -1L, -1L, -1L), + type = "categorical", + categories = list( + structure(list( + id = -1L, + name = "No Data", + numeric_value = NULL, + missing = TRUE + ), + .Names = c("id", "name", "numeric_value", "missing") + ) + ), + name = "oak"), + .Names = c("values", "type", "categories", "name"), + class = "VariableDefinition") + varDef <- crunch:::createSubvarDef(v, str = "oak", + delim = "; ", + selected = "Yes", + not_selected = "No", + unanswered_val = v[is.na(v)]) + expect_equivalent(varDef, expected) + }) + c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + test_that("buildRegex generates the expected regular expression", { + rx <- buildRegex("maple", "; ") + expect_true(grepl(rx, "maple")) + expect_true(grepl(rx, "maple; birch")) + expect_true(grepl(rx, "oak; maple; birch")) + expect_true(grepl(rx, "birch; maple")) + expect_false(grepl(rx, "birch; sugar maple")) + expect_false(grepl(rx, "maple butter; oak")) + #test delimiters that are regex characters + expect_true(grepl(buildRegex("maple", "| "), "oak| maple| birch")) + }) }) with_test_authentication({ @@ -136,4 +181,19 @@ with_test_authentication({ expect_true(setequal(names(ds), names(mrdf))) expect_identical(ncol(ds), 4L) }) + whereas("mrFromDelim functions as expected", { + ds <- newDataset(mrdf) + v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + ds$delim <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + test_that("mrFromDelim creates a variable", { + ds$mr_5 <- mrFromDelim(ds$delim, delim = "; ", name = "myMR") + expect_identical(names(subvariables(ds$mr_5)), + c("maple", "birch", "oak", "sugar maple", "maple butter")) + expect_identical(names(categories(ds$mr_5)), + c("not_selected", "selected", "No Data")) + expect_identical(as.vector(ds$mr_5$maple), + structure(c(2L, 2L, 1L, 1L), .Label = c("not_selected", "selected" + ), class = "factor")) + }) + }) }) From 80c987d53d77dfc1fc7db5671f93f05ea0e19607 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Tue, 31 Oct 2017 16:24:34 -0300 Subject: [PATCH 02/13] Initial implementation of mrFromDelim --- NAMESPACE | 1 + R/make-array.R | 64 ++++++++++++++++++++++++++++++++ man/mrFromDelim.Rd | 31 ++++++++++++++++ tests/testthat/test-make-array.R | 60 ++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 man/mrFromDelim.Rd diff --git a/NAMESPACE b/NAMESPACE index 628a65541..4a307ce37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -164,6 +164,7 @@ export(mergeFork) export(mkdir) export(modifyWeightVariables) export(moveToGroup) +export(mrFromDelim) export(mv) export(newDataset) export(newDatasetByColumn) diff --git a/R/make-array.R b/R/make-array.R index a2ba874d5..3bbab3a4e 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -80,6 +80,69 @@ makeMR <- function (subvariables, name, selections, ...) { return(vardef) } + +#' Create Multiple Response Variable from Delimited +#' +#' Surveys often record multiple response questions in delimited lists where +#' each respondent's selections are separated by a delimiter like `;` or `|`. +#' This function breaks the delimited responses into subvariables, uploads those +#' subvariables to crunch, and finally creates a multiple response variable from +#' them. +#' +#' @param var The variable containing the delimited responses +#' @param delim The delimiter separating the repsonses +#' @param name The name of the resulting MR variable +#' @param selected A character string used to indicate a selection +#' @param not_selected Character string identifying non-selection +#' @param unanswered Character string indicating non-response +#' @param ... Other arguments to be passed on to [makeMR()] +#' +#' @return +#' @export +mrFromDelim <- function(var, + delim, + name, + selected = "selected", + not_selected = "not_selected", + unanswered = NA, + ...) { + if (missing(name)) { + halt("Must supply a name for the new variable") + } + if (is.Categorical(var) || is.Text(var)) { + v <- as.vector(var) + } else { + halt(dQuote("var"), " must be a Categorical or Text Crunch Variable.") + } + uniques <- unique(v[!is.na(v)]) + cats <- unique(unlist(strsplit(uniques, delim))) + vardefs <- lapply(cats, function(x) createSubvarDef(v, x, delim, + selected, not_selected, unanswered_val = unanswered, missing = is.na(v))) + ds <- loadDataset(datasetReference(var)) + addVariables(ds, vardefs) + ds <- refresh(ds) + return(makeMR(ds[, cats], name = name, selections = selected)) +} + +createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered_val, missing) { + out <- values <- grepl(buildRegex(str, delim), var) + out[values] <- selected + out[!values] <- not_selected + out[missing] <- unanswered_val + return(toVariable(factor(out), name = str)) +} + +buildRegex <- function(str, delim){ + delim <- paste0('\\', delim) + regex <- paste0( + "^", str, delim, "|", + delim, str, delim, "|", + delim, str, "$", "|", + "^", str, "$") + return(regex) +} + + #' @rdname makeArray #' @export deriveArray <- function (subvariables, name, selections, ...) { @@ -105,6 +168,7 @@ deriveArray <- function (subvariables, name, selections, ...) { return(VariableDefinition(derivation=derivation, name=name, ...)) } + #' Rearrange array subvariables #' #' Sometimes it is useful to group subvariables across arrays in order to diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd new file mode 100644 index 000000000..5ab140bbf --- /dev/null +++ b/man/mrFromDelim.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make-array.R +\name{mrFromDelim} +\alias{mrFromDelim} +\title{Create Multiple Response Variable from Delimited} +\usage{ +mrFromDelim(var, delim, name, selected = "selected", + not_selected = "not_selected", unanswered = NA, ...) +} +\arguments{ +\item{var}{The variable containing the delimited responses} + +\item{delim}{The delimiter separating the repsonses} + +\item{name}{The name of the resulting MR variable} + +\item{selected}{A character string used to indicate a selection} + +\item{not_selected}{Character string identifying non-selection} + +\item{unanswered}{Character string indicating non-response} + +\item{...}{Other arguments to be passed on to \code{\link[=makeMR]{makeMR()}}} +} +\description{ +Surveys often record multiple response questions in delimited lists where +each respondent's selections are separated by a delimiter like \code{;} or \code{|}. +This function breaks the delimited responses into subvariables, uploads those +subvariables to crunch, and finally creates a multiple response variable from +them. +} diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index ed08a48d6..fcde61a79 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -57,6 +57,51 @@ with_mock_crunch({ name="Gen"), "Undefined columns selected: NOTAVARIABLE") }) + + test_that("mrFromDelim errors correctly", { + expect_error(mrFromDelim(ds$var, "; ",), + "Must supply a name for the new variable") + expect_error(mrFromDelim(mtcars$cyl, name = "name"), + paste0(dQuote("var"), " must be a Categorical or Text Crunch Variable.")) + }) + + test_that("createSubvarDef generates the correct variable definition", { + v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak", NA) + expected <- structure(list( + values = c(-1L, -1L, -1L, -1L, -1L), + type = "categorical", + categories = list( + structure(list( + id = -1L, + name = "No Data", + numeric_value = NULL, + missing = TRUE + ), + .Names = c("id", "name", "numeric_value", "missing") + ) + ), + name = "oak"), + .Names = c("values", "type", "categories", "name"), + class = "VariableDefinition") + varDef <- crunch:::createSubvarDef(v, str = "oak", + delim = "; ", + selected = "Yes", + not_selected = "No", + unanswered_val = v[is.na(v)]) + expect_equivalent(varDef, expected) + }) + c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + test_that("buildRegex generates the expected regular expression", { + rx <- buildRegex("maple", "; ") + expect_true(grepl(rx, "maple")) + expect_true(grepl(rx, "maple; birch")) + expect_true(grepl(rx, "oak; maple; birch")) + expect_true(grepl(rx, "birch; maple")) + expect_false(grepl(rx, "birch; sugar maple")) + expect_false(grepl(rx, "maple butter; oak")) + #test delimiters that are regex characters + expect_true(grepl(buildRegex("maple", "| "), "oak| maple| birch")) + }) }) with_test_authentication({ @@ -136,4 +181,19 @@ with_test_authentication({ expect_true(setequal(names(ds), names(mrdf))) expect_identical(ncol(ds), 4L) }) + whereas("mrFromDelim functions as expected", { + ds <- newDataset(mrdf) + v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + ds$delim <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + test_that("mrFromDelim creates a variable", { + ds$mr_5 <- mrFromDelim(ds$delim, delim = "; ", name = "myMR") + expect_identical(names(subvariables(ds$mr_5)), + c("maple", "birch", "oak", "sugar maple", "maple butter")) + expect_identical(names(categories(ds$mr_5)), + c("not_selected", "selected", "No Data")) + expect_identical(as.vector(ds$mr_5$maple), + structure(c(2L, 2L, 1L, 1L), .Label = c("not_selected", "selected" + ), class = "factor")) + }) + }) }) From 943ff7cf2e4270accd56eed44ab15dceb907e179 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Wed, 1 Nov 2017 14:22:12 -0300 Subject: [PATCH 03/13] Hide variables after upload --- R/make-array.R | 3 ++- man/mrFromDelim.Rd | 2 +- tests/testthat/test-make-array.R | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index 3bbab3a4e..9cd30deb2 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -90,7 +90,7 @@ makeMR <- function (subvariables, name, selections, ...) { #' them. #' #' @param var The variable containing the delimited responses -#' @param delim The delimiter separating the repsonses +#' @param delim The delimiter separating the responses #' @param name The name of the resulting MR variable #' @param selected A character string used to indicate a selection #' @param not_selected Character string identifying non-selection @@ -120,6 +120,7 @@ mrFromDelim <- function(var, selected, not_selected, unanswered_val = unanswered, missing = is.na(v))) ds <- loadDataset(datasetReference(var)) addVariables(ds, vardefs) + hide(var) ds <- refresh(ds) return(makeMR(ds[, cats], name = name, selections = selected)) } diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd index 5ab140bbf..b7b7e1bbf 100644 --- a/man/mrFromDelim.Rd +++ b/man/mrFromDelim.Rd @@ -10,7 +10,7 @@ mrFromDelim(var, delim, name, selected = "selected", \arguments{ \item{var}{The variable containing the delimited responses} -\item{delim}{The delimiter separating the repsonses} +\item{delim}{The delimiter separating the responses} \item{name}{The name of the resulting MR variable} diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index fcde61a79..4cf231f78 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -194,6 +194,7 @@ with_test_authentication({ expect_identical(as.vector(ds$mr_5$maple), structure(c(2L, 2L, 1L, 1L), .Label = c("not_selected", "selected" ), class = "factor")) + expect_identical(hiddenVariables(ds), "delim") }) }) }) From 81f2797cadbc5209d5ceb0ca3727c0b5ca81fb98 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Wed, 1 Nov 2017 14:48:45 -0300 Subject: [PATCH 04/13] Removed whitespace --- R/make-array.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/make-array.R b/R/make-array.R index 9cd30deb2..96d5bc976 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -169,7 +169,6 @@ deriveArray <- function (subvariables, name, selections, ...) { return(VariableDefinition(derivation=derivation, name=name, ...)) } - #' Rearrange array subvariables #' #' Sometimes it is useful to group subvariables across arrays in order to From efed473075b1bfe03b1208aeb1b9acf06413212c Mon Sep 17 00:00:00 2001 From: GShotwell Date: Thu, 2 Nov 2017 10:49:01 -0300 Subject: [PATCH 05/13] Expanded documentation --- R/make-array.R | 45 +++++++++++++++++++++++++++----- man/buildDelimRegex.Rd | 30 +++++++++++++++++++++ man/createSubvarDef.Rd | 37 ++++++++++++++++++++++++++ man/mrFromDelim.Rd | 2 +- tests/testthat/test-make-array.R | 9 ++++--- 5 files changed, 111 insertions(+), 12 deletions(-) create mode 100644 man/buildDelimRegex.Rd create mode 100644 man/createSubvarDef.Rd diff --git a/R/make-array.R b/R/make-array.R index 96d5bc976..93f376947 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -86,7 +86,7 @@ makeMR <- function (subvariables, name, selections, ...) { #' Surveys often record multiple response questions in delimited lists where #' each respondent's selections are separated by a delimiter like `;` or `|`. #' This function breaks the delimited responses into subvariables, uploads those -#' subvariables to crunch, and finally creates a multiple response variable from +#' subvariables to Crunch, and finally creates a multiple response variable from #' them. #' #' @param var The variable containing the delimited responses @@ -117,7 +117,7 @@ mrFromDelim <- function(var, uniques <- unique(v[!is.na(v)]) cats <- unique(unlist(strsplit(uniques, delim))) vardefs <- lapply(cats, function(x) createSubvarDef(v, x, delim, - selected, not_selected, unanswered_val = unanswered, missing = is.na(v))) + selected, not_selected, unanswered, missing = is.na(v))) ds <- loadDataset(datasetReference(var)) addVariables(ds, vardefs) hide(var) @@ -125,16 +125,47 @@ mrFromDelim <- function(var, return(makeMR(ds[, cats], name = name, selections = selected)) } -createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered_val, missing) { - out <- values <- grepl(buildRegex(str, delim), var) +#' createSubvarDef +#' +#' This function creates a single subvariable definition based on a character string +#' to search for and an originating variable. It uses regex to determine whether +#' a string is present in a delimited list, then substitutes the user supplied values +#' to indicate selection, non-selection, and missingness. +#' +#' TODO: When Crunch allows variable derivation to be created via regex, this should +#' create a derivation instead of a definition with values. +#' +#' @inheritParams mrFromDelim +#' @param str A string whose presence indicates a selection +#' @param missing A logical vector indicating which variable entries are missing +#' @keywords internal +#' +#' @return A VariableDefinition +createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered, missing) { + out <- values <- grepl(buildDelimRegex(str, delim), var) out[values] <- selected out[!values] <- not_selected - out[missing] <- unanswered_val + out[missing] <- unanswered return(toVariable(factor(out), name = str)) } -buildRegex <- function(str, delim){ - delim <- paste0('\\', delim) +#' Build Regex to find delimited items. +#' +#' A delimited item can appear in a list in four ways +#' 1. At the start of a list `maple; oak` +#' 1. In the middle of a list `oak; maple; birch` +#' 1. At the end of a list `oak; maple` +#' 1. Alone with no delimiters `maple` +#' +#' This function builds a regex expression which captures those four values. It +#' is mostly broken out of [createSubvarDef()] for testing purposes. +#' +#' @inheritParams createSubvarDef +#' +#' @return A character string +#' @keywords internal +buildDelimRegex <- function(str, delim){ + delim <- paste0('\\', delim) # the delimeter needs to be escaped in case it's a regex character regex <- paste0( "^", str, delim, "|", delim, str, delim, "|", diff --git a/man/buildDelimRegex.Rd b/man/buildDelimRegex.Rd new file mode 100644 index 000000000..3dda1751c --- /dev/null +++ b/man/buildDelimRegex.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make-array.R +\name{buildDelimRegex} +\alias{buildDelimRegex} +\title{Build Regex to find delimited items.} +\usage{ +buildDelimRegex(str, delim) +} +\arguments{ +\item{str}{A string whose presence indicates a selection} + +\item{delim}{The delimiter separating the responses} +} +\value{ +A character string +} +\description{ +A delimited item can appear in a list in four ways +\enumerate{ +\item At the start of a list \code{maple; oak} +\item In the middle of a list \code{oak; maple; birch} +\item At the end of a list \code{oak; maple} +\item Alone with no delimiters \code{maple} +} +} +\details{ +This function builds a regex expression which captures those four values. It +is mostly broken out of \code{\link[=createSubvarDef]{createSubvarDef()}} for testing purposes. +} +\keyword{internal} diff --git a/man/createSubvarDef.Rd b/man/createSubvarDef.Rd new file mode 100644 index 000000000..7ef279bfb --- /dev/null +++ b/man/createSubvarDef.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make-array.R +\name{createSubvarDef} +\alias{createSubvarDef} +\title{createSubvarDef} +\usage{ +createSubvarDef(var, str, delim, selected, not_selected, unanswered, missing) +} +\arguments{ +\item{var}{The variable containing the delimited responses} + +\item{str}{A string whose presence indicates a selection} + +\item{delim}{The delimiter separating the responses} + +\item{selected}{A character string used to indicate a selection} + +\item{not_selected}{Character string identifying non-selection} + +\item{unanswered}{Character string indicating non-response} + +\item{missing}{A logical vector indicating which variable entries are missing} +} +\value{ +A VariableDefinition +} +\description{ +This function creates a single subvariable definition based on a character string +to search for and an originating variable. It uses regex to determine whether +a string is present in a delimited list, then substitutes the user supplied values +to indicate selection, non-selection, and missingness. +} +\details{ +TODO: When Crunch allows variable derivation to be created via regex, this should +create a derivation instead of a definition with values. +} +\keyword{internal} diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd index b7b7e1bbf..fc9593781 100644 --- a/man/mrFromDelim.Rd +++ b/man/mrFromDelim.Rd @@ -26,6 +26,6 @@ mrFromDelim(var, delim, name, selected = "selected", Surveys often record multiple response questions in delimited lists where each respondent's selections are separated by a delimiter like \code{;} or \code{|}. This function breaks the delimited responses into subvariables, uploads those -subvariables to crunch, and finally creates a multiple response variable from +subvariables to Crunch, and finally creates a multiple response variable from them. } diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index 4cf231f78..3b50d348a 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -87,12 +87,12 @@ with_mock_crunch({ delim = "; ", selected = "Yes", not_selected = "No", - unanswered_val = v[is.na(v)]) + unanswered = v[is.na(v)]) expect_equivalent(varDef, expected) }) c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") - test_that("buildRegex generates the expected regular expression", { - rx <- buildRegex("maple", "; ") + test_that("buildDelimRegex generates the expected regular expression", { + rx <- buildDelimRegex("maple", "; ") expect_true(grepl(rx, "maple")) expect_true(grepl(rx, "maple; birch")) expect_true(grepl(rx, "oak; maple; birch")) @@ -100,7 +100,8 @@ with_mock_crunch({ expect_false(grepl(rx, "birch; sugar maple")) expect_false(grepl(rx, "maple butter; oak")) #test delimiters that are regex characters - expect_true(grepl(buildRegex("maple", "| "), "oak| maple| birch")) + expect_true(grepl(buildDelimRegex("maple", "| "), "oak| maple| birch")) + expect_false(grepl(buildDelimRegex("maple", "| "), "oak| sugar maple| birch")) }) }) From 42ff9c943de6e4caee45c6bd37547b7adf19efc6 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Thu, 16 Nov 2017 10:01:00 -0400 Subject: [PATCH 06/13] Switched to Variable Definition --- R/make-array.R | 68 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index 93f376947..c7e9b1e6e 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -81,7 +81,7 @@ makeMR <- function (subvariables, name, selections, ...) { } -#' Create Multiple Response Variable from Delimited +#' Create Multiple Response Variable from Delimited lists #' #' Surveys often record multiple response questions in delimited lists where #' each respondent's selections are separated by a delimiter like `;` or `|`. @@ -92,14 +92,14 @@ makeMR <- function (subvariables, name, selections, ...) { #' @param var The variable containing the delimited responses #' @param delim The delimiter separating the responses #' @param name The name of the resulting MR variable -#' @param selected A character string used to indicate a selection -#' @param not_selected Character string identifying non-selection -#' @param unanswered Character string indicating non-response +#' @param selected A character string used to indicate a selection, defaults to "selected +#' @param not_selected Character string identifying non-selection, defaults to "not_selected" +#' @param unanswered Character string indicating non-response, defaults to NA. #' @param ... Other arguments to be passed on to [makeMR()] #' -#' @return +#' @return a Multiple response variable definition #' @export -mrFromDelim <- function(var, +mrFromDelim <- function (var, delim, name, selected = "selected", @@ -110,18 +110,16 @@ mrFromDelim <- function(var, halt("Must supply a name for the new variable") } if (is.Categorical(var) || is.Text(var)) { - v <- as.vector(var) + uniques <- names(table(var)) } else { - halt(dQuote("var"), " must be a Categorical or Text Crunch Variable.") + halt(dQuote(substitute(var)), " must be a Categorical or Text Crunch Variable.") } - uniques <- unique(v[!is.na(v)]) cats <- unique(unlist(strsplit(uniques, delim))) - vardefs <- lapply(cats, function(x) createSubvarDef(v, x, delim, - selected, not_selected, unanswered, missing = is.na(v))) + vardefs <- lapply(cats, function(x) createSubvarDef(var, x, delim, + selected, not_selected, unanswered)) ds <- loadDataset(datasetReference(var)) - addVariables(ds, vardefs) - hide(var) - ds <- refresh(ds) + ds <- addVariables(ds, vardefs) + var <- hide(var) return(makeMR(ds[, cats], name = name, selections = selected)) } @@ -132,8 +130,6 @@ mrFromDelim <- function(var, #' a string is present in a delimited list, then substitutes the user supplied values #' to indicate selection, non-selection, and missingness. #' -#' TODO: When Crunch allows variable derivation to be created via regex, this should -#' create a derivation instead of a definition with values. #' #' @inheritParams mrFromDelim #' @param str A string whose presence indicates a selection @@ -141,12 +137,38 @@ mrFromDelim <- function(var, #' @keywords internal #' #' @return A VariableDefinition -createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered, missing) { - out <- values <- grepl(buildDelimRegex(str, delim), var) - out[values] <- selected - out[!values] <- not_selected - out[missing] <- unanswered - return(toVariable(factor(out), name = str)) +createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered) { + if (is.na(unanswered)) { + unanswered <- "No Data" + } + browser() + new_cat_type <- list( + value = list( + class = "categorical", + categories = list( + # list("id" = 1, + # "name" = unanswered, + # "numeric_value" = NA, + # "missing" = TRUE), + list("id" = 1, + "name" = selected, + "numeric_value" = NA, + "missing" = FALSE), + list("id" = 2, + "name" = not_selected, + "numeric_value" = NA, + "missing" = FALSE) + ) + ) + ) + new_cat <- list(column = I(1:2), type = new_cat_type) + deriv <- zfunc("case", new_cat) + #deriv$args[[2]] <- zfunc("is_missing", var) + deriv$args[[2]] <- zfunc("~=", var, buildDelimRegex(str, delim)) + out <- VariableDefinition(name = str, derivation = deriv) + ds$test3 <- out + ds$test2 <- NULL + return(out) } #' Build Regex to find delimited items. @@ -164,7 +186,7 @@ createSubvarDef <- function(var, str, delim, selected, not_selected, unanswered, #' #' @return A character string #' @keywords internal -buildDelimRegex <- function(str, delim){ +buildDelimRegex <- function (str, delim){ delim <- paste0('\\', delim) # the delimeter needs to be escaped in case it's a regex character regex <- paste0( "^", str, delim, "|", From 6a035ea34a2429da329965a288293d5562915f96 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Thu, 16 Nov 2017 11:39:17 -0400 Subject: [PATCH 07/13] Variable definition accomodates missing values --- R/make-array.R | 24 ++++++------ tests/testthat/test-make-array.R | 67 +++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index c7e9b1e6e..17a7cad09 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -120,7 +120,7 @@ mrFromDelim <- function (var, ds <- loadDataset(datasetReference(var)) ds <- addVariables(ds, vardefs) var <- hide(var) - return(makeMR(ds[, cats], name = name, selections = selected)) + return(makeMR(ds[, cats], name = name, selections = selected, ...)) } #' createSubvarDef @@ -137,37 +137,35 @@ mrFromDelim <- function (var, #' @keywords internal #' #' @return A VariableDefinition -createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered) { +createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered, debug = FALSE) { + if(debug) browser() if (is.na(unanswered)) { unanswered <- "No Data" } - browser() new_cat_type <- list( value = list( class = "categorical", categories = list( - # list("id" = 1, - # "name" = unanswered, - # "numeric_value" = NA, - # "missing" = TRUE), list("id" = 1, + "name" = unanswered, + "numeric_value" = NA, + "missing" = TRUE), + list("id" = 2, "name" = selected, "numeric_value" = NA, "missing" = FALSE), - list("id" = 2, + list("id" = 3, "name" = not_selected, "numeric_value" = NA, "missing" = FALSE) ) ) ) - new_cat <- list(column = I(1:2), type = new_cat_type) + new_cat <- list(column = I(1:3), type = new_cat_type) deriv <- zfunc("case", new_cat) - #deriv$args[[2]] <- zfunc("is_missing", var) - deriv$args[[2]] <- zfunc("~=", var, buildDelimRegex(str, delim)) + deriv$args[[2]] <- zfunc("is_missing", var) + deriv$args[[3]] <- zfunc("~=", var, buildDelimRegex(str, delim)) out <- VariableDefinition(name = str, derivation = deriv) - ds$test3 <- out - ds$test2 <- NULL return(out) } diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index 3b50d348a..26f5c84e0 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -61,36 +61,65 @@ with_mock_crunch({ test_that("mrFromDelim errors correctly", { expect_error(mrFromDelim(ds$var, "; ",), "Must supply a name for the new variable") - expect_error(mrFromDelim(mtcars$cyl, name = "name"), - paste0(dQuote("var"), " must be a Categorical or Text Crunch Variable.")) + expect_error(mrFromDelim("string", name = "name"), + paste0(dQuote("string"), " must be a Categorical or Text Crunch Variable.")) }) test_that("createSubvarDef generates the correct variable definition", { v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak", NA) - expected <- structure(list( - values = c(-1L, -1L, -1L, -1L, -1L), - type = "categorical", - categories = list( - structure(list( - id = -1L, - name = "No Data", - numeric_value = NULL, - missing = TRUE - ), - .Names = c("id", "name", "numeric_value", "missing") + expected <- list( + name = "oak", + derivation = list( + `function` = "case", + args = list( + list( + column = I(1:3), + type = list( + value = list( + class = "categorical", + categories = list( + list(id = 1, + name = "No Data", + numeric_value = NA, + missing = TRUE), + list(id = 2, + name = "Yes", + numeric_value = NA, + missing = FALSE), + list(id = 3, + name = "No", + numeric_value = NA, + missing = FALSE) + ) + ) + ) + ), + list(`function` = "is_missing", + args = list( + list(column = c("maple; birch", "oak; maple; birch", + "birch; sugar maple", "maple butter; oak", NA) + ) + ) + ), list( + `function` = "~=", + args = list(list(column = c("maple; birch", + "oak; maple; birch", "birch; sugar maple", "maple butter; oak", + NA) + ), list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$") + ) + ) ) - ), - name = "oak"), - .Names = c("values", "type", "categories", "name"), - class = "VariableDefinition") + ) + ) + varDef <- crunch:::createSubvarDef(v, str = "oak", delim = "; ", selected = "Yes", not_selected = "No", - unanswered = v[is.na(v)]) + unanswered = NA) expect_equivalent(varDef, expected) }) - c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + test_that("buildDelimRegex generates the expected regular expression", { rx <- buildDelimRegex("maple", "; ") expect_true(grepl(rx, "maple")) From 52479c2064a8825f424c38258f2c821cd3121b14 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Thu, 16 Nov 2017 13:26:27 -0400 Subject: [PATCH 08/13] Updated tests --- R/make-array.R | 5 +- tests/testthat/test-make-array.R | 85 ++++++++++++++------------------ 2 files changed, 40 insertions(+), 50 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index 17a7cad09..89afdc5f0 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -137,8 +137,7 @@ mrFromDelim <- function (var, #' @keywords internal #' #' @return A VariableDefinition -createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered, debug = FALSE) { - if(debug) browser() +createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered) { if (is.na(unanswered)) { unanswered <- "No Data" } @@ -165,7 +164,7 @@ createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered deriv <- zfunc("case", new_cat) deriv$args[[2]] <- zfunc("is_missing", var) deriv$args[[3]] <- zfunc("~=", var, buildDelimRegex(str, delim)) - out <- VariableDefinition(name = str, derivation = deriv) + out <- VariableDefinition(name = paste0(alias(var), "_", str), derivation = deriv) return(out) } diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index 26f5c84e0..d21d6b2bc 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -66,53 +66,44 @@ with_mock_crunch({ }) test_that("createSubvarDef generates the correct variable definition", { - v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak", NA) - expected <- list( - name = "oak", - derivation = list( - `function` = "case", - args = list( - list( - column = I(1:3), - type = list( - value = list( - class = "categorical", - categories = list( - list(id = 1, - name = "No Data", - numeric_value = NA, - missing = TRUE), - list(id = 2, - name = "Yes", - numeric_value = NA, - missing = FALSE), - list(id = 3, - name = "No", - numeric_value = NA, - missing = FALSE) - ) - ) - ) - ), - list(`function` = "is_missing", - args = list( - list(column = c("maple; birch", "oak; maple; birch", - "birch; sugar maple", "maple butter; oak", NA) - ) - ) - ), list( - `function` = "~=", - args = list(list(column = c("maple; birch", - "oak; maple; birch", "birch; sugar maple", "maple butter; oak", - NA) - ), list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$") - ) - ) - ) - ) - ) - - varDef <- crunch:::createSubvarDef(v, str = "oak", + expected <- list(name = "textVar_oak", + derivation = list( + `function` = "case", + args = list( + list(column = I(1:3), + type = list( + value = list(class = "categorical", + categories = list( + list(id = 1, + name = "No Data", + numeric_value = NA, + missing = TRUE), + list(id = 2, + name = "Yes", + numeric_value = NA, + missing = FALSE), + list(id = 3, + name = "No", + numeric_value = NA, + missing = FALSE) + ) + ) + ) + ), + list(`function` = "is_missing", + args = list( + list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/") + ) + ), + list(`function` = "~=", + args = list( + list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/"), + list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$")) + ) + ) + ) + ) + varDef <- createSubvarDef(ds$textVar, str = "oak", delim = "; ", selected = "Yes", not_selected = "No", From 8aeb3c73caff0a741bbe5540f670d29feadba70d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 2 Feb 2018 15:13:11 -0600 Subject: [PATCH 09/13] Proper ZCL for deriving MRs directly from other variables. --- R/make-array.R | 59 +++++---- .../api/datasets/mr_from_delim.json | 70 +++++++++++ .../datasets/mr_from_delim/cube-0e8c31.json | 98 +++++++++++++++ .../variables-d118fa-4b6023-PATCH.json | 5 + .../mr_from_delim/variables-d118fa.json | 26 ++++ .../mr_from_delim/variables/hier-d118fa.json | 6 + tests/testthat/test-make-array.R | 114 +++++++++++------- 7 files changed, 311 insertions(+), 67 deletions(-) create mode 100644 tests/testthat/app.crunch.io/api/datasets/mr_from_delim.json create mode 100644 tests/testthat/app.crunch.io/api/datasets/mr_from_delim/cube-0e8c31.json create mode 100644 tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa-4b6023-PATCH.json create mode 100644 tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa.json create mode 100644 tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables/hier-d118fa.json diff --git a/R/make-array.R b/R/make-array.R index 89afdc5f0..bde248c60 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -92,38 +92,53 @@ makeMR <- function (subvariables, name, selections, ...) { #' @param var The variable containing the delimited responses #' @param delim The delimiter separating the responses #' @param name The name of the resulting MR variable -#' @param selected A character string used to indicate a selection, defaults to "selected -#' @param not_selected Character string identifying non-selection, defaults to "not_selected" +#' @param selected A character string used to indicate a selection, defaults to +#' "selected" +#' @param not_selected Character string identifying non-selection, defaults to +#' "not_selected" #' @param unanswered Character string indicating non-response, defaults to NA. #' @param ... Other arguments to be passed on to [makeMR()] #' #' @return a Multiple response variable definition #' @export mrFromDelim <- function (var, - delim, - name, - selected = "selected", - not_selected = "not_selected", - unanswered = NA, - ...) { + delim, + name, + selected = "selected", + not_selected = "not_selected", + unanswered = NA, + ...) { if (missing(name)) { halt("Must supply a name for the new variable") } if (is.Categorical(var) || is.Text(var)) { uniques <- names(table(var)) } else { - halt(dQuote(substitute(var)), " must be a Categorical or Text Crunch Variable.") + halt(dQuote(substitute(var)), + " must be a Categorical or Text Crunch Variable.") } - cats <- unique(unlist(strsplit(uniques, delim))) - vardefs <- lapply(cats, function(x) createSubvarDef(var, x, delim, + items <- unique(unlist(strsplit(uniques, delim))) + + # make a derivation expression for each unique item + subvarderivs <- lapply(items, function(x) createSubvarDeriv(var, x, delim, selected, not_selected, unanswered)) - ds <- loadDataset(datasetReference(var)) - ds <- addVariables(ds, vardefs) + names(subvarderivs) <- items + + # generate the ZCL to make an array from the subvariable derivations, and + # then do selection magic to make an MR + derivation <- zfunc("select_categories", + zfunc("array", + zfunc("select", list(map=subvarderivs), + list(value=I(c(1, 2, 3, 4, 5))))), + list(value=I("selected"))) + + # hide the original variable var <- hide(var) - return(makeMR(ds[, cats], name = name, selections = selected, ...)) + + return(VariableDefinition(derivation=derivation, name=name, ...)) } -#' createSubvarDef +#' createSubvarDeriv #' #' This function creates a single subvariable definition based on a character string #' to search for and an originating variable. It uses regex to determine whether @@ -137,7 +152,8 @@ mrFromDelim <- function (var, #' @keywords internal #' #' @return A VariableDefinition -createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered) { +createSubvarDeriv <- function (var, str, delim, selected, not_selected, + unanswered) { if (is.na(unanswered)) { unanswered <- "No Data" } @@ -164,8 +180,8 @@ createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered deriv <- zfunc("case", new_cat) deriv$args[[2]] <- zfunc("is_missing", var) deriv$args[[3]] <- zfunc("~=", var, buildDelimRegex(str, delim)) - out <- VariableDefinition(name = paste0(alias(var), "_", str), derivation = deriv) - return(out) + deriv$references <- list(name = str, alias = paste0(alias(var), "_", str)) + return(deriv) } #' Build Regex to find delimited items. @@ -177,14 +193,15 @@ createSubvarDef <- function (var, str, delim, selected, not_selected, unanswered #' 1. Alone with no delimiters `maple` #' #' This function builds a regex expression which captures those four values. It -#' is mostly broken out of [createSubvarDef()] for testing purposes. +#' is mostly broken out of [createSubvarDeriv()] for testing purposes. #' -#' @inheritParams createSubvarDef +#' @inheritParams createSubvarDeriv #' #' @return A character string #' @keywords internal buildDelimRegex <- function (str, delim){ - delim <- paste0('\\', delim) # the delimeter needs to be escaped in case it's a regex character + # the delimeter needs to be escaped in case it's a regex character + delim <- paste0('\\', delim) regex <- paste0( "^", str, delim, "|", delim, str, delim, "|", diff --git a/tests/testthat/app.crunch.io/api/datasets/mr_from_delim.json b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim.json new file mode 100644 index 000000000..de7eb696f --- /dev/null +++ b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim.json @@ -0,0 +1,70 @@ +{ + "element": "shoji:entity", + "self": "https://app.crunch.io/api/datasets/mr_from_delim/", + "catalogs": { + "batches": "https://app.crunch.io/api/datasets/mr_from_delim/batches/", + "users": "https://app.crunch.io/api/datasets/mr_from_delim/users/", + "variables": "https://app.crunch.io/api/datasets/mr_from_delim/variables/", + "actions": "https://app.crunch.io/api/datasets/mr_from_delim/actions/", + "savepoints": "https://app.crunch.io/api/datasets/mr_from_delim/savepoints/", + "boxdata": "https://app.crunch.io/api/datasets/mr_from_delim/boxdata/", + "filters": "https://app.crunch.io/api/datasets/mr_from_delim/filters/", + "multitables": "https://app.crunch.io/api/datasets/mr_from_delim/multitables/", + "comparisons": "https://app.crunch.io/api/datasets/mr_from_delim/comparisons/", + "forks": "https://app.crunch.io/api/datasets/mr_from_delim/forks/", + "permissions": "https://app.crunch.io/api/datasets/mr_from_delim/permissions/", + "joins": "https://app.crunch.io/api/datasets/mr_from_delim/joins/", + "decks": "https://app.crunch.io/api/datasets/mr_from_delim/decks/", + "parent": "https://app.crunch.io/api/datasets/", + "weight_variables": "https://app.crunch.io/api/datasets/mr_from_delim/weight_variables/" + }, + "fragments": { + "preferences": "https://app.crunch.io/api/datasets/mr_from_delim/preferences/", + "stream": "https://app.crunch.io/api/datasets/mr_from_delim/stream/", + "settings": "https://app.crunch.io/api/datasets/mr_from_delim/settings/", + "visit": "https://app.crunch.io/api/datasets/mr_from_delim/visit/", + "state": "https://app.crunch.io/api/datasets/mr_from_delim/state/", + "table": "https://app.crunch.io/api/datasets/mr_from_delim/table/", + "pk": "https://app.crunch.io/api/datasets/mr_from_delim/pk/", + "exclusion": "https://app.crunch.io/api/datasets/mr_from_delim/exclusion/" + }, + "views": { + "cube": "https://app.crunch.io/api/datasets/mr_from_delim/cube/", + "export": "https://app.crunch.io/api/datasets/mr_from_delim/export/", + "summary": "https://app.crunch.io/api/datasets/mr_from_delim/summary/", + "applied_filters": "https://app.crunch.io/api/datasets/mr_from_delim/filters/applied/" + }, + "specification": "https://app.crunch.io/api/specifications/datasets/", + "description": "Detail for a given dataset", + "body": { + "size": { + "rows": 4, + "columns": 1 + }, + "current_editor_name": "Me", + "owner_name": "Me", + "name": "for testing functionality of MR from delimited text", + "end_date": null, + "access_time": "2017-04-12T19:07:06.351000", + "notes": "", + "current_editor": "https://app.crunch.io/api/users/me/", + "creation_time": "2017-04-12T14:34:00.015000", + "archived": false, + "start_date": null, + "modification_time": "2017-04-12T14:34:03.239000", + "app_settings": { + "crunch": { + "deleted_rogue_vp": true + } + }, + "owner": "https://app.crunch.io/api/users/me/", + "permissions": { + "edit": true, + "change_permissions": true, + "view": true + }, + "is_published": true, + "id": "mr_from_delim", + "description": "" + } +} diff --git a/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/cube-0e8c31.json b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/cube-0e8c31.json new file mode 100644 index 000000000..fadc89da9 --- /dev/null +++ b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/cube-0e8c31.json @@ -0,0 +1,98 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/mr_from_delim/?query=%7B%22dimensions%22%3A%5B%7B%22variable%22%3A%22http%3A%2F%2Flocal.crunch.io%3A8080%2Fapi%2Fdatasets%2F85ca8c595bc94a5f8e15bd94bfb8c5e7%2Fvariables%2F000001%2F%22%7D%5D%2C%22measures%22%3A%7B%22count%22%3A%7B%22function%22%3A%22cube_count%22%2C%22args%22%3A%5B%5D%7D%7D%7D&filter=%7B%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/mr_from_delim/variables/000001/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "delim", + "name": "delim" + }, + "derived": true, + "type": { + "subtype": { + "class": "text", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "elements": [ + { + "id": 0, + "value": "birch; sugar maple", + "missing": false + }, + { + "id": 1, + "value": "maple butter; oak", + "missing": false + }, + { + "id": 2, + "value": "maple; birch", + "missing": false + }, + { + "id": 3, + "value": "oak; maple; birch", + "missing": false + } + ], + "class": "enum" + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 1, + 1, + 1, + 1 + ], + "n_missing": 0, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 1, + 1, + 1, + 1 + ], + "n": 4 + } + } +} diff --git a/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa-4b6023-PATCH.json b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa-4b6023-PATCH.json new file mode 100644 index 000000000..6434a5946 --- /dev/null +++ b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa-4b6023-PATCH.json @@ -0,0 +1,5 @@ +{ + "https://app.crunch.io/api/datasets/mr_from_delim/variables/000003/": { + "discarded": true + } +} diff --git a/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa.json b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa.json new file mode 100644 index 000000000..b2e2fe2c2 --- /dev/null +++ b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables-d118fa.json @@ -0,0 +1,26 @@ +{ + "element": "shoji:catalog", + "self": "https://app.crunch.io/api/datasets/mr_from_delim/variables/?relative=on", + "catalogs": { + "private": "https://app.crunch.io/api/datasets/mr_from_delim/variables/private/" + }, + "orders": { + "personal": "https://app.crunch.io/api/datasets/mr_from_delim/variables/personal/", + "weights": "https://app.crunch.io/api/datasets/mr_from_delim/variables/weights/", + "hier": "https://app.crunch.io/api/datasets/mr_from_delim/variables/hier/" + }, + "specification": "https://app.crunch.io/api/specifications/variables/", + "description": "List of Variables of this dataset", + "index": { + "000003/": { + "discarded": false, + "alias": "delimed_text", + "name": "delimed_text", + "type": "text", + "notes": "", + "derived": false, + "id": "000001", + "description": "" + } + } +} diff --git a/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables/hier-d118fa.json b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables/hier-d118fa.json new file mode 100644 index 000000000..40336c70d --- /dev/null +++ b/tests/testthat/app.crunch.io/api/datasets/mr_from_delim/variables/hier-d118fa.json @@ -0,0 +1,6 @@ +{ + "element": "shoji:order", + "self": "https://app.crunch.io/api/datasets/four/variables/hier/?relative=on", + "description": "Hierarchical order of dataset variables", + "graph": ["../000001/", "../000010/", "../000000/", "../000006/", "../000003/", "../000008/", "../000004/", "../000007/", "../000005/", "../000002/", "../000009/"] +} diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index d21d6b2bc..bf8ff113e 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -59,51 +59,50 @@ with_mock_crunch({ }) test_that("mrFromDelim errors correctly", { - expect_error(mrFromDelim(ds$var, "; ",), + expect_error(mrFromDelim(ds$var, "; "), "Must supply a name for the new variable") expect_error(mrFromDelim("string", name = "name"), paste0(dQuote("string"), " must be a Categorical or Text Crunch Variable.")) }) - test_that("createSubvarDef generates the correct variable definition", { - expected <- list(name = "textVar_oak", - derivation = list( - `function` = "case", - args = list( - list(column = I(1:3), - type = list( - value = list(class = "categorical", - categories = list( - list(id = 1, - name = "No Data", - numeric_value = NA, - missing = TRUE), - list(id = 2, - name = "Yes", - numeric_value = NA, - missing = FALSE), - list(id = 3, - name = "No", - numeric_value = NA, - missing = FALSE) - ) - ) - ) - ), - list(`function` = "is_missing", - args = list( - list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/") - ) - ), - list(`function` = "~=", - args = list( - list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/"), - list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$")) - ) + test_that("createSubvarDeriv generates the correct variable definition", { + expected <- list( + `function` = "case", + args = list( + list(column = I(1:3), + type = list( + value = list(class = "categorical", + categories = list( + list(id = 1, + name = "No Data", + numeric_value = NA, + missing = TRUE), + list(id = 2, + name = "Yes", + numeric_value = NA, + missing = FALSE), + list(id = 3, + name = "No", + numeric_value = NA, + missing = FALSE) + ) + ) + ) + ), + list(`function` = "is_missing", + args = list( + list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/") + ) + ), + list(`function` = "~=", + args = list( + list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/"), + list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$")) ) - ) + ), + references = list(name = "oak", alias = "textVar_oak") ) - varDef <- createSubvarDef(ds$textVar, str = "oak", + varDef <- createSubvarDeriv(ds$textVar, str = "oak", delim = "; ", selected = "Yes", not_selected = "No", @@ -123,6 +122,35 @@ with_mock_crunch({ expect_true(grepl(buildDelimRegex("maple", "| "), "oak| maple| birch")) expect_false(grepl(buildDelimRegex("maple", "| "), "oak| sugar maple| birch")) }) + + test_that("mrFromDelim sends the correct variable derivation", { + ds2 <- loadDataset("https://app.crunch.io/api/datasets/mr_from_delim/") + trees <- c("birch", "sugar maple", "maple butter", "oak", "maple") + expected <- VariableDefinition( + derivation=zfunc( + "select_categories", zfunc( + "array", zfunc( + "select", + list(map=lapply(trees, function (tree) { + return(createSubvarDeriv(ds2$delimed_text, + str = tree, + delim = "; ", + selected = "Yes", + not_selected = "No", + unanswered = NA)) + })), + list(value=I(c(1, 2, 3, 4, 5))) + ) + ), + list(value=I("selected"))), + name="New Mr") + varDef <- mrFromDelim(ds2$delimed_text, delim = "; ", + name = "New Mr", + selected = "Yes", + not_selected = "No", + unanswered = NA) + expect_equivalent(varDef, expected) + }) }) with_test_authentication({ @@ -208,14 +236,8 @@ with_test_authentication({ ds$delim <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") test_that("mrFromDelim creates a variable", { ds$mr_5 <- mrFromDelim(ds$delim, delim = "; ", name = "myMR") - expect_identical(names(subvariables(ds$mr_5)), - c("maple", "birch", "oak", "sugar maple", "maple butter")) - expect_identical(names(categories(ds$mr_5)), - c("not_selected", "selected", "No Data")) - expect_identical(as.vector(ds$mr_5$maple), - structure(c(2L, 2L, 1L, 1L), .Label = c("not_selected", "selected" - ), class = "factor")) - expect_identical(hiddenVariables(ds), "delim") + expect_true(is.derived(ds$mr_5)) + # TODO: assert shape of as.vector, etc. }) }) }) From e57b227c08db0ec6dd1e0f30f7a2d27f53d74e10 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 2 Feb 2018 15:16:05 -0600 Subject: [PATCH 10/13] Updated docs --- man/buildDelimRegex.Rd | 2 +- ...reateSubvarDef.Rd => createSubvarDeriv.Rd} | 20 +++++++++---------- man/mrFromDelim.Rd | 13 ++++++++---- 3 files changed, 19 insertions(+), 16 deletions(-) rename man/{createSubvarDef.Rd => createSubvarDeriv.Rd} (60%) diff --git a/man/buildDelimRegex.Rd b/man/buildDelimRegex.Rd index 3dda1751c..540bf2455 100644 --- a/man/buildDelimRegex.Rd +++ b/man/buildDelimRegex.Rd @@ -25,6 +25,6 @@ A delimited item can appear in a list in four ways } \details{ This function builds a regex expression which captures those four values. It -is mostly broken out of \code{\link[=createSubvarDef]{createSubvarDef()}} for testing purposes. +is mostly broken out of \code{\link[=createSubvarDeriv]{createSubvarDeriv()}} for testing purposes. } \keyword{internal} diff --git a/man/createSubvarDef.Rd b/man/createSubvarDeriv.Rd similarity index 60% rename from man/createSubvarDef.Rd rename to man/createSubvarDeriv.Rd index 7ef279bfb..b74d00610 100644 --- a/man/createSubvarDef.Rd +++ b/man/createSubvarDeriv.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/make-array.R -\name{createSubvarDef} -\alias{createSubvarDef} -\title{createSubvarDef} +\name{createSubvarDeriv} +\alias{createSubvarDeriv} +\title{createSubvarDeriv} \usage{ -createSubvarDef(var, str, delim, selected, not_selected, unanswered, missing) +createSubvarDeriv(var, str, delim, selected, not_selected, unanswered) } \arguments{ \item{var}{The variable containing the delimited responses} @@ -13,11 +13,13 @@ createSubvarDef(var, str, delim, selected, not_selected, unanswered, missing) \item{delim}{The delimiter separating the responses} -\item{selected}{A character string used to indicate a selection} +\item{selected}{A character string used to indicate a selection, defaults to +"selected"} -\item{not_selected}{Character string identifying non-selection} +\item{not_selected}{Character string identifying non-selection, defaults to +"not_selected"} -\item{unanswered}{Character string indicating non-response} +\item{unanswered}{Character string indicating non-response, defaults to NA.} \item{missing}{A logical vector indicating which variable entries are missing} } @@ -30,8 +32,4 @@ to search for and an originating variable. It uses regex to determine whether a string is present in a delimited list, then substitutes the user supplied values to indicate selection, non-selection, and missingness. } -\details{ -TODO: When Crunch allows variable derivation to be created via regex, this should -create a derivation instead of a definition with values. -} \keyword{internal} diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd index fc9593781..a3322ff05 100644 --- a/man/mrFromDelim.Rd +++ b/man/mrFromDelim.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/make-array.R \name{mrFromDelim} \alias{mrFromDelim} -\title{Create Multiple Response Variable from Delimited} +\title{Create Multiple Response Variable from Delimited lists} \usage{ mrFromDelim(var, delim, name, selected = "selected", not_selected = "not_selected", unanswered = NA, ...) @@ -14,14 +14,19 @@ mrFromDelim(var, delim, name, selected = "selected", \item{name}{The name of the resulting MR variable} -\item{selected}{A character string used to indicate a selection} +\item{selected}{A character string used to indicate a selection, defaults to +"selected"} -\item{not_selected}{Character string identifying non-selection} +\item{not_selected}{Character string identifying non-selection, defaults to +"not_selected"} -\item{unanswered}{Character string indicating non-response} +\item{unanswered}{Character string indicating non-response, defaults to NA.} \item{...}{Other arguments to be passed on to \code{\link[=makeMR]{makeMR()}}} } +\value{ +a Multiple response variable definition +} \description{ Surveys often record multiple response questions in delimited lists where each respondent's selections are separated by a delimiter like \code{;} or \code{|}. From 36c5bf4afa0532e00926b0b11b90a7fe14116fae Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 2 Feb 2018 15:29:33 -0600 Subject: [PATCH 11/13] Man page optimization --- R/make-array.R | 2 +- man/createSubvarDeriv.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index bde248c60..80157d67c 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -138,7 +138,7 @@ mrFromDelim <- function (var, return(VariableDefinition(derivation=derivation, name=name, ...)) } -#' createSubvarDeriv +#' Create subvariable derivation expressions #' #' This function creates a single subvariable definition based on a character string #' to search for and an originating variable. It uses regex to determine whether diff --git a/man/createSubvarDeriv.Rd b/man/createSubvarDeriv.Rd index b74d00610..9865af5a8 100644 --- a/man/createSubvarDeriv.Rd +++ b/man/createSubvarDeriv.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/make-array.R \name{createSubvarDeriv} \alias{createSubvarDeriv} -\title{createSubvarDeriv} +\title{Create subvariable derivation expressions} \usage{ createSubvarDeriv(var, str, delim, selected, not_selected, unanswered) } From 6b7fcb34f8bfa462d55ce87ee18214defd92fc99 Mon Sep 17 00:00:00 2001 From: GShotwell Date: Wed, 14 Feb 2018 12:42:44 -0400 Subject: [PATCH 12/13] Regex handling - Changed map names to accomodate no periods in Mongo - Added escape functionality for regex metacharacters --- R/make-array.R | 28 ++++++++++++++-------------- R/misc.R | 14 ++++++++++++++ tests/testthat/test-make-array.R | 17 ++++++++++++----- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index 80157d67c..acdae8369 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -92,9 +92,9 @@ makeMR <- function (subvariables, name, selections, ...) { #' @param var The variable containing the delimited responses #' @param delim The delimiter separating the responses #' @param name The name of the resulting MR variable -#' @param selected A character string used to indicate a selection, defaults to +#' @param selected A character string used to indicate a selection, defaults to #' "selected" -#' @param not_selected Character string identifying non-selection, defaults to +#' @param not_selected Character string identifying non-selection, defaults to #' "not_selected" #' @param unanswered Character string indicating non-response, defaults to NA. #' @param ... Other arguments to be passed on to [makeMR()] @@ -118,23 +118,21 @@ mrFromDelim <- function (var, " must be a Categorical or Text Crunch Variable.") } items <- unique(unlist(strsplit(uniques, delim))) - - # make a derivation expression for each unique item + # make a derivation expression for each unique item subvarderivs <- lapply(items, function(x) createSubvarDeriv(var, x, delim, selected, not_selected, unanswered)) - names(subvarderivs) <- items - - # generate the ZCL to make an array from the subvariable derivations, and + names(subvarderivs) <- gsub("\\.", "_", items) # mongo errors if there are dots in the names + + # generate the ZCL to make an array from the subvariable derivations, and # then do selection magic to make an MR derivation <- zfunc("select_categories", - zfunc("array", + zfunc("array", zfunc("select", list(map=subvarderivs), list(value=I(c(1, 2, 3, 4, 5))))), list(value=I("selected"))) - + # hide the original variable var <- hide(var) - return(VariableDefinition(derivation=derivation, name=name, ...)) } @@ -180,19 +178,20 @@ createSubvarDeriv <- function (var, str, delim, selected, not_selected, deriv <- zfunc("case", new_cat) deriv$args[[2]] <- zfunc("is_missing", var) deriv$args[[3]] <- zfunc("~=", var, buildDelimRegex(str, delim)) - deriv$references <- list(name = str, alias = paste0(alias(var), "_", str)) + new_alias <- paste0(alias(var), "_", gsub("\\.", "_", str)) # Mongo doesn't allow aliases with dots + deriv$references <- list(name = str, alias = new_alias) return(deriv) } #' Build Regex to find delimited items. #' -#' A delimited item can appear in a list in four ways +#' A delimited item `maple` can appear in a list in four ways #' 1. At the start of a list `maple; oak` #' 1. In the middle of a list `oak; maple; birch` #' 1. At the end of a list `oak; maple` #' 1. Alone with no delimiters `maple` #' -#' This function builds a regex expression which captures those four values. It +#' This function builds a regex expression which captures those four cases It #' is mostly broken out of [createSubvarDeriv()] for testing purposes. #' #' @inheritParams createSubvarDeriv @@ -201,7 +200,8 @@ createSubvarDeriv <- function (var, str, delim, selected, not_selected, #' @keywords internal buildDelimRegex <- function (str, delim){ # the delimeter needs to be escaped in case it's a regex character - delim <- paste0('\\', delim) + delim <- escapeRegex(delim) + str <- escapeRegex(str) regex <- paste0( "^", str, delim, "|", delim, str, delim, "|", diff --git a/R/misc.R b/R/misc.R index 0e9c1b6df..f2196c7b9 100644 --- a/R/misc.R +++ b/R/misc.R @@ -205,3 +205,17 @@ setCrunchAPI <- function (subdomain, port=NULL) { return(invisible()) } +#' escape Regex +#' +#'This function takes a string and escapes all of the special characters in the string. +#'So VB.NET becomes VB\.NET. Note that R will print this as VB\\.NET, but `cat` reveals +#'that there's only one `\` +#' @param string +#' +#' @kerwords internal +#' escapeRegex("Tom&Jerry") +#' escapeRegex(".Net) +escapeRegex <- function(string) { + out <- gsub("([.|()\\^{}+$*?])", "\\\\\\1", string) + return(gsub("(\\[|\\])", "\\\\\\1", out)) +} diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index bf8ff113e..1b21ba44c 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -97,7 +97,7 @@ with_mock_crunch({ list(`function` = "~=", args = list( list(variable = "https://app.crunch.io/api/datasets/1/variables/textVar/"), - list(value = "^oak\\; |\\; oak\\; |\\; oak$|^oak$")) + list(value = "^oak; |; oak; |; oak$|^oak$")) ) ), references = list(name = "oak", alias = "textVar_oak") @@ -110,6 +110,13 @@ with_mock_crunch({ expect_equivalent(varDef, expected) }) + test_that("escapeRegex escapes all metacharacters", { + metachars <- c(".", "^", "$", "*", "+", "?", "{", "}", "[", "]", "\\", "|", "(", ")") + str <- paste0("vb", metachars, "net") + expect_identical(escapeRegex(str), paste0("vb\\", metachars, "net")) + expect_identical(escapeRegex("vb.a|net"), "vb\\.a\\|net") + }) + test_that("buildDelimRegex generates the expected regular expression", { rx <- buildDelimRegex("maple", "; ") expect_true(grepl(rx, "maple")) @@ -122,7 +129,7 @@ with_mock_crunch({ expect_true(grepl(buildDelimRegex("maple", "| "), "oak| maple| birch")) expect_false(grepl(buildDelimRegex("maple", "| "), "oak| sugar maple| birch")) }) - + test_that("mrFromDelim sends the correct variable derivation", { ds2 <- loadDataset("https://app.crunch.io/api/datasets/mr_from_delim/") trees <- c("birch", "sugar maple", "maple butter", "oak", "maple") @@ -232,12 +239,12 @@ with_test_authentication({ }) whereas("mrFromDelim functions as expected", { ds <- newDataset(mrdf) - v <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") - ds$delim <- c("maple; birch", "oak; maple; birch", "birch; sugar maple", "maple butter; oak") + v <- c("ma.ple; birch", "oak; ma.ple; birch", "birch; sugar maple", "maple butter; oak") + ds$delim <- c("ma.ple; birch", "oak; ma.ple; birch", "birch; sugar maple", "maple butter; oak") test_that("mrFromDelim creates a variable", { ds$mr_5 <- mrFromDelim(ds$delim, delim = "; ", name = "myMR") expect_true(is.derived(ds$mr_5)) - # TODO: assert shape of as.vector, etc. + expect_identical(dim(as.vector(ds$mr_5)), c(nrow(ds), 5)) }) }) }) From 3144e1263151985392debcb54d87db72cc5fb9d4 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 15 Mar 2018 14:18:27 -0700 Subject: [PATCH 13/13] Rename function to makeMRFromText(); add NEWS --- NAMESPACE | 2 +- NEWS.md | 1 + R/make-array.R | 4 ++-- man/mrFromDelim.Rd | 6 +++--- tests/testthat/test-make-array.R | 16 ++++++++-------- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a37e5ce51..60d0b00bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -181,7 +181,7 @@ export(mergeFork) export(mkdir) export(modifyWeightVariables) export(moveToGroup) -export(mrFromDelim) +export(makeMRFromText) export(mv) export(newDataset) export(newDatasetByColumn) diff --git a/NEWS.md b/NEWS.md index 2dc39bdc3..61e5d272c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ ## crunch 1.20.1 (under development) * Variables can now be converted from one type to another with server-side derivations. Have a text input that is only numbers, and want to have a variables that is a true numeric? Simple, just use `ds$id_var_numeric <- as.Numeric(ds$id_var)`. There Are `as.*` methods for all Crunch data types except for array-like variables. +* `makeMRFromText()` to take a variable imported as delimited strings, parse the multiple-response options, and return a (derived) multiple_response variable. * Added support for setting population sizes on datasets with `setPopulation(ds, size = 24.13e6, magnitude = 3)` and for getting population sizes (or magnitudes) with `popSize(ds)` and `popMagnitude(ds)` respectively. * Add `options(crunch.show.progress)` to govern whether to report progress of long-running requests. Default is `TRUE`, but set it to `FALSE` to run quietly. * Export `pollProgress()` and recommend using that when a long-running request fails to complete within the local timeout. diff --git a/R/make-array.R b/R/make-array.R index acdae8369..a05db5b33 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -101,7 +101,7 @@ makeMR <- function (subvariables, name, selections, ...) { #' #' @return a Multiple response variable definition #' @export -mrFromDelim <- function (var, +makeMRFromText <- function (var, delim, name, selected = "selected", @@ -144,7 +144,7 @@ mrFromDelim <- function (var, #' to indicate selection, non-selection, and missingness. #' #' -#' @inheritParams mrFromDelim +#' @inheritParams makeMRFromText #' @param str A string whose presence indicates a selection #' @param missing A logical vector indicating which variable entries are missing #' @keywords internal diff --git a/man/mrFromDelim.Rd b/man/mrFromDelim.Rd index a3322ff05..79c6f7e1a 100644 --- a/man/mrFromDelim.Rd +++ b/man/mrFromDelim.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/make-array.R -\name{mrFromDelim} -\alias{mrFromDelim} +\name{makeMRFromText} +\alias{makeMRFromText} \title{Create Multiple Response Variable from Delimited lists} \usage{ -mrFromDelim(var, delim, name, selected = "selected", +makeMRFromText(var, delim, name, selected = "selected", not_selected = "not_selected", unanswered = NA, ...) } \arguments{ diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index 1b21ba44c..a2188e7d1 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -58,10 +58,10 @@ with_mock_crunch({ "Undefined columns selected: NOTAVARIABLE") }) - test_that("mrFromDelim errors correctly", { - expect_error(mrFromDelim(ds$var, "; "), + test_that("makeMRFromText errors correctly", { + expect_error(makeMRFromText(ds$var, "; "), "Must supply a name for the new variable") - expect_error(mrFromDelim("string", name = "name"), + expect_error(makeMRFromText("string", name = "name"), paste0(dQuote("string"), " must be a Categorical or Text Crunch Variable.")) }) @@ -130,7 +130,7 @@ with_mock_crunch({ expect_false(grepl(buildDelimRegex("maple", "| "), "oak| sugar maple| birch")) }) - test_that("mrFromDelim sends the correct variable derivation", { + test_that("makeMRFromText sends the correct variable derivation", { ds2 <- loadDataset("https://app.crunch.io/api/datasets/mr_from_delim/") trees <- c("birch", "sugar maple", "maple butter", "oak", "maple") expected <- VariableDefinition( @@ -151,7 +151,7 @@ with_mock_crunch({ ), list(value=I("selected"))), name="New Mr") - varDef <- mrFromDelim(ds2$delimed_text, delim = "; ", + varDef <- makeMRFromText(ds2$delimed_text, delim = "; ", name = "New Mr", selected = "Yes", not_selected = "No", @@ -237,12 +237,12 @@ with_test_authentication({ expect_true(setequal(names(ds), names(mrdf))) expect_identical(ncol(ds), 4L) }) - whereas("mrFromDelim functions as expected", { + whereas("makeMRFromText functions as expected", { ds <- newDataset(mrdf) v <- c("ma.ple; birch", "oak; ma.ple; birch", "birch; sugar maple", "maple butter; oak") ds$delim <- c("ma.ple; birch", "oak; ma.ple; birch", "birch; sugar maple", "maple butter; oak") - test_that("mrFromDelim creates a variable", { - ds$mr_5 <- mrFromDelim(ds$delim, delim = "; ", name = "myMR") + test_that("makeMRFromText creates a variable", { + ds$mr_5 <- makeMRFromText(ds$delim, delim = "; ", name = "myMR") expect_true(is.derived(ds$mr_5)) expect_identical(dim(as.vector(ds$mr_5)), c(nrow(ds), 5)) })