diff --git a/DESCRIPTION b/DESCRIPTION index e7cc20b41..1b22523fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Suggests: knitr, magrittr, rmarkdown, + rootWishart, rstudioapi, spelling, styler, @@ -80,6 +81,7 @@ Collate: 'crunch.R' 'crunchbox.R' 'cube-collapse-dimensions.R' + 'cube-comparisons.R' 'cube-dims.R' 'cube-index-table.R' 'cube-query.R' diff --git a/NAMESPACE b/NAMESPACE index 2f481e05c..e47f85828 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -231,6 +231,7 @@ export(noTransforms) export(notifyIfNewVersion) export(ownerNames) export(owners) +export(pairwiseMatrix) export(pendingStream) export(permissions) export(pk) @@ -288,6 +289,8 @@ export(versions) export(webApp) export(weight) export(weightVariables) +export(wishartMatrix) +export(wishartPvalues) export(with_consent) export(write.csv.gz) export(writePreparedData) diff --git a/R/cube-comparisons.R b/R/cube-comparisons.R new file mode 100644 index 000000000..a01389138 --- /dev/null +++ b/R/cube-comparisons.R @@ -0,0 +1,306 @@ +#' Column and row comparison +#' +#' Comparing a column or row with a baseline column or row. This calculates the +#' test statistic \eqn{\chi^2} of independence for each pair of columns/rows. +#' +#' @param cube a CrunchCube to calculate the comparison on +#' @param dim which dimension is being compared (`cols` or `rows`, only valid +#' for `compareDims()`) +#' @param baseline a character, the column/row to use as a baseline to compare `x` +#' against +#' @param x a character, the column/row to compare against the baseline +#' @param value Value of the test to return. Defaults to `statistic`; use `p.value` +#' for the p-value of the test of the pair of columns/rows. +#' @param ... arguments passed from `compareRows()` or `compareCols()` to +#' `compareDims()` (i.e. `baseline` and `x`) +#' +#' @return the value for the column or row given in `x`, default is Chi-squared +#' +#' @name dimension-comparison +NULL + +#' @rdname dimension-comparison +#' @export +compareCols <- function(cube, ...) compareDims(cube = cube, dim = "cols", ...) + +#' @rdname dimension-comparison +#' @export +compareRows <- function(cube, ...) compareDims(cube = cube, dim = "rows", ...) + +#' @rdname dimension-comparison +#' @export +compareDims <- function(cube, dim = c("cols", "rows"), baseline, x, value=c("statistic", "p.value")) { + dim <- match.arg(dim) + value <- match.arg(value) + # grab the names of extents for each dimensions + # TODO: we shouldn't need to do this if we can just pass baseline/x into the + # subsetting function when we can subset by name _or_ index natively + if (dim == "cols") { + names <- colnames(as.array(cube)) + } else if (dim == "rows") { + names <- rownames(as.array(cube)) + } + + # convert to numeric indices + # TODO: remove when we can accept either characters or indices and pass them + # to subsetting unmolested + if (is.character(baseline) && length(baseline) == 1) { + baseline_ind <- which(names == baseline) + } else { + stop("Currently, column comparison only accepts at most one category name.") + } + if (is.character(x) && length(x) == 1) { + x_ind <- which(names == x) + } else { + stop("Currently, column comparison only accepts at most one category name.") + } + + # ensure that the extents given are in the cube + # TODO: remove when we can defer this check to the subsetting methods + not_in_cube <- !(c(baseline, x) %in% names) + if (any(not_in_cube)) { + stop(c(baseline, x)[not_in_cube], " is not a column or row in the cube") + } + + # ensure that there are not MRs on the comparison direction + dim_types <- getDimTypes(cube) + if ((dim == "cols" & startsWith(dim_types[2], "mr_")) | + (dim == "rows" & startsWith(dim_types[1], "mr_"))) { + stop( + "Column or row z-scores are not implemented for multiple response ", + "dimensions" + ) + } + + # return NA for the diagonal (comparison is self) + # note: this is duplicated work in compareCols + if (x_ind == baseline_ind) { + return(NA) + } + + # generate the 2xm or nx2 table tfor testing + if (dim == "cols") { + sub_cube <- cube[, c(x_ind, baseline_ind)] + } else if (dim == "rows") { + sub_cube <- cube[c(x_ind, baseline_ind), ] + } + + # make the test + # TODO: fix this output for the diagonal case + out <- chisq.test(as.array(sub_cube))[[value]] + + return(out) +} + +#' Pairwise column and row comparison +#' +#' Given a single baseline column compare each other row or column against this +#' baseline. Internally this function uses `compareDims()` iteratively. +#' +#' *Warning* since there is more than one comparison being made against each +#' baseline the z-scores, and especially the p-values derived from these +#' z-scores should be interpreted with caution. Using standard p-value cutoffs +#' will result in anti-conservative interpretations because of the \href{https://en.wikipedia.org/wiki/Multiple_comparisons_problem}{multiple +#' comparisons problem}. +#' Adjustments to p-value cut offs (e.g. \href{https://en.wikipedia.org/wiki/Bonferroni_correction}{Bonferonni correction}) should be used when interpreting z-scores from the +#' `compare[Rows|Cols|Dims]Pairwise()` family of functions. +#' +#' @param cube a CrunchCube to calculate the comparison on +#' @param dim which dimension is being compared (`rows` or `cols`, only valid +#' for `compareDims()`) +#' @param baseline a character, the name of a column or row to compare +#' against each of the others in turn +#' @param value Value of the test to return. Defaults to `statistic`; use `p.value` +#' for the p-value of the test of the pair of columns/rows. +#' @param ... arguments passed from `compareRowsPairwise()` or +#' `compareColsPairwise()` to `compareDimsPairwise()` (e.g., `baseline`) +#' +#' @return a vector of representing the baseline column’s test statistic +#' with respect to each of the others. +#' +#' @name dimension-comparison-pairwise +NULL + +#' @rdname dimension-comparison-pairwise +#' @export +compareColsPairwise <- function(cube, ...) { + compareDimsPairwise(cube = cube, dim = "cols", ...) +} + +#' @rdname dimension-comparison-pairwise +#' @export +compareRowsPairwise <- function(cube, ...) { + compareDimsPairwise(cube = cube, dim = "rows", ...) +} + +#' @rdname dimension-comparison-pairwise +#' @export +compareDimsPairwise <- function(cube, dim = c("cols", "rows"), baseline, value=c("statistic", "p.value")) { + dim <- match.arg(dim) + + # grab the names of extents for each dimensions + # TODO: we shouldn't need to do this if we can just pass baseline/x into the + # subsetting function when we can subset by name _or_ id natively + if (dim == "cols") { + names <- colnames(as.array(cube)) + } else if (dim == "rows") { + names <- rownames(as.array(cube)) + } + + to_compare <- names[!(names %in% baseline)] + + out <- vapply( + names, function(one_extent) { + # generate the 2xm or nx2 table for testing + return(compareDims(cube, baseline = baseline, x = one_extent, dim = dim, value=value)) + } + , numeric(1) + ) + + out <- simplify2array(out) + + # if the dimension is rows, the array that vapply returns needs to be + # transposed + if (dim == "rows") { + out <- t(out) + } + + names(out) <- names + + return(out) +} + +#' Matrix of Chi-Squared Statistics for all rows or columns +#' +#' Generate a matrix of pairwise comparisons of rows or columns, each against +#' the others. +#' +#' @param cube a CrunchCube representing counts of a categorical by categorical +#' contingency table. +#' @param dim which dimension along which to compare (`cols` or `rows`) +#' @param value Value of the test to return. Defaults to `statistic`; use `p.value` +#' for the p-value of the test of the pair of columns/rows. +#' +#' @return A symmetric square matrix of all column-comparison or row-comparison +#' \eqn{\chi^2} statistics. Typical element \eqn{i,j} is the test statistic equivalent +#' to \code{chisq.test} subsetting to just columns i and j (for dim="cols"). +#' @export +#' +#' @examples +#' \dontrun{ +#' some_cube <- crunch_example_data(cat_by_cat) +#' +#' # By default, return the 'statistic' of chisq.test, evaluated +#' # for each pair of columns pairwise: +#' pairwiseMatrix(some_cube) +#' +#' # or each pair of rows: +#' pairwiseMatrix(some_cube, dim="rows") +#' +#' # Or, specify another "value" from `chisq.test`: +#' pairwiseMatrix(some_cube, value="p.value") +#' } +pairwiseMatrix <- function(cube, dim=c("cols", "rows"), value=c('statistic', 'p.value')){ + dim <- match.arg(dim) + value <- match.arg(value) + a <- as.array(cube) + if (dim == "cols") { + names <- colnames(a) + } else if (dim == "rows") { + names <- rownames(a) + } + out <- vapply(names, function(cur) { + compareDimsPairwise(cube, dim=dim, baseline=cur, value=value) + }, + numeric(length(names)) + ) + return(simplify2array(out)) +} + +#' Matrix of Chi-Squared Statistics for all rows or columns +#' +#' Use the alternative Wishart method of forming the matrix of column- or row-wise +#' comparison Chi-squared test statistics for a categorical-by-categorical +#' contingency table. +#' +#' @param cube a CrunchCube of counts: a crosstab of two categorical variables +#' @param dim dimension along which to compare proportions (default `cols` or `rows`) +#' +#' @return A symmetric square matrix of Chi-squared statistics for a crosstab +#' @export +#' +#' @examples +#' \dontrun{ +#' # TODO: include Hirotsu occupation-illness example cube and output +#' illness_occupation <- crunch_example_data(hirotsu) +#' # Chi-squared statistic for joint null hypothesis that each +#' # column is equal to each other column. +#' +#' wishartMatrix(illness_occupation) +#' } +wishartMatrix <- function(cube, dim=c("cols","rows")){ + dim <- match.arg(dim) + a <- as.array(cube) + if(dim == "rows") { + # This would be simpler if we could locally transpose the CrunchCube, + # but instead these results will have transposed shapes for rows. + a <- t(a) + props <- t(prop.table(cube, 1)) + margins <- margin.table(cube, 1) + weights <- t(prop.table(margin.table(cube, 2))) + } else { + props <- prop.table(cube, 2) + margins <- margin.table(cube, 2) + weights <- prop.table(margin.table(cube, 1)) + } + names <- dimnames(a)[[2]] + + + I <- ncol(a) + X2 <- matrix(0.0, nrow = I, ncol = I) + for (i in 2:I){ + for (j in 1:(i-1)) { + X2[j,i] <- X2[i,j] <- sum( (props[,i] - props[,j])^2 / weights ) / + ( 1 / margins[i] + 1 / margins[j] ) + } + } + if(dim == "rows") { + X2 <- t(X2) + } + dimnames(X2) <- list(names, names) + return(X2) +} + + +#' P-values for pairwise comparison on a dimension of a categorical crosstab +#' +#' @param cube a CrunchCube +#' @param dim dimension which jointly forms the null hypothesis that each +#' vector is equal to all of the others. +#' +#' @return A symmetric square matrix of the size of `dim`. Typical element +#' \eqn{i,j} is the P-value associated with the hypothesis that proportions in +#' row or column \eqn{i} are equal to those in row or column \eqn{j}. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # TODO: include Hirotsu occupation-illness example cube and output +#' illness_occupation <- crunch_example_data(hirotsu) +#' # P-value for joint null hypothesis that each +#' # column is equal to each other column. +#' +#' wishartPvalues(illness_occupation) +#' } +wishartPvalues <- function(cube, dim=c("cols", "rows")){ + checkInstalledPackages("rootWishart") + X2 <- wishartMatrix(cube, dim) + a <- as.array(cube) + I <- nrow(a) + J <- ncol(a) + p <- max(c(I,J)) - 1 + n <- min(c(I,J)) - 1 + upper.tail <- function(x) 1.0 - rootWishart::singleWishart(x, p, n, type = "double") + apply(X2, c(1,2), upper.tail) +} diff --git a/R/cube-residuals.R b/R/cube-residuals.R index b4d4d028b..5a4f75778 100644 --- a/R/cube-residuals.R +++ b/R/cube-residuals.R @@ -75,180 +75,6 @@ standardizedMRResiduals <- function(cube) { #' @export rstandard <- function(model) zScores(x = model) -#' Column and row comparison -#' -#' Comparing a column or row with a baseline column or row. This calculates the -#' z-score for the cells when comparing `x` to the baseline columns -#' -#' @param cube a cube to calculate the comparison on -#' @param dim which dimension is being compared (`rows` or `cols`, only valid -#' for `compareDims()`) -#' @param baseline a character, the column to use as a baseline to compare `x` -#' against -#' @param x a character, the column to compare against the baseline -#' @param ... arguments passed from `compareRows()` or `compareCols()` to -#' `compareDims()` (i.e. `baseline` and `x`) -#' -#' @return the z-score for the column or row given in `x` -#' -#' @name dimension-comparison -NULL - -#' @rdname dimension-comparison -#' @export -compareCols <- function(cube, ...) compareDims(cube = cube, dim = "cols", ...) - -#' @rdname dimension-comparison -#' @export -compareRows <- function(cube, ...) compareDims(cube = cube, dim = "rows", ...) - -#' @rdname dimension-comparison -#' @export -compareDims <- function(cube, dim = c("cols", "rows"), baseline, x) { - dim <- match.arg(dim) - - # grab the names of extents for each dimensions - # TODO: we shouldn't need to do this if we can just pass baseline/x into the - # subsetting function when we can subset by name _or_ index natively - if (dim == "cols") { - names <- colnames(as.array(cube)) - } else if (dim == "rows") { - names <- rownames(as.array(cube)) - } - - # convert to numeric indices - # TODO: remove when we can accept either characters or indices and pass them - # to subsetting unmolested - if (is.character(baseline) && length(baseline) == 1) { - baseline_ind <- which(names == baseline) - } else { - stop("Currently, column comparison only accepts at most one category name.") - } - if (is.character(x) && length(x) == 1) { - x_ind <- which(names == x) - } else { - stop("Currently, column comparison only accepts at most one category name.") - } - - # ensure that the extents given are in the cube - # TODO: remove when we can defer this check to the subsetting methods - not_in_cube <- !(c(baseline, x) %in% names) - if (any(not_in_cube)) { - stop(c(baseline, x)[not_in_cube], " is not a column or row in the cube") - } - - # ensure that there are not MRs on the comparison direction - dim_types <- getDimTypes(cube) - if ((dim == "cols" & startsWith(dim_types[2], "mr_")) | - (dim == "rows" & startsWith(dim_types[1], "mr_"))) { - stop( - "Column or row z-scores are not implemented for multiple response ", - "dimensions" - ) - } - - # generate the 2xm or nx2 table tfor testing - if (dim == "cols") { - sub_cube <- cube[, c(x_ind, baseline_ind)] - } else if (dim == "rows") { - sub_cube <- cube[c(x_ind, baseline_ind), ] - } - - # make the test - out <- zScores(sub_cube) - - # only return the scores for x - if (dim == "cols") { - out <- out[, x, drop = FALSE] - } else if (dim == "rows") { - out <- out[x, , drop = FALSE] - } - - return(out) -} - - -#' Pairwise column and row comparison -#' -#' Given a single baseline column compare each other row or column against this -#' baseline. Internally this function uses `compareDims()` iteratively. -#' -#' *Warning* since there is more than one comparison being made against each -#' baseline the z-scores, and especially the p-values derived from these -#' z-scores should be interpreted with caution. Using standard p-value cutoffs -#' will result in anti-conservative interpretations because of the \href{https://en.wikipedia.org/wiki/Multiple_comparisons_problem}{multiple -#' comparisons problem}. -#' Adjustments to p-value cut offs (e.g. \href{https://en.wikipedia.org/wiki/Bonferroni_correction}{Bonferonni correction}) should be used when interpreting z-scores from the -#' `compare[Rows|Cols|Dims]Pairwise()` family of functions. -#' -#' @param cube a cube to calculate the comparison on -#' @param dim which dimension is being compared (`rows` or `cols`, only valid -#' for `compareDims()`) -#' @param baseline a character, the column to use as a baseline to compare -#' against all other columns -#' @param ... arguments passed from `compareRowsPairwise()` or -#' `compareColsPairwise()` to `compareDimsPairwise()` (i.e. `baseline`) -#' -#' @return an array of z-score for the all the columns or rows compared to -#' `baseline`. The `baseline` column is all 0s -#' -#' @name dimension-comparison-pairwise -NULL - -#' @rdname dimension-comparison-pairwise -#' @export -compareColsPairwise <- function(cube, ...) { - compareDimsPairwise(cube = cube, dim = "cols", ...) -} - -#' @rdname dimension-comparison-pairwise -#' @export -compareRowsPairwise <- function(cube, ...) { - compareDimsPairwise(cube = cube, dim = "rows", ...) -} - -#' @rdname dimension-comparison-pairwise -#' @export -compareDimsPairwise <- function(cube, dim = c("cols", "rows"), baseline) { - dim <- match.arg(dim) - - # grab the names of extents for each dimensions - # TODO: we shouldn't need to do this if we can just pass baseline/x into the - # subsetting function when we can subset by name _or_ id natively - if (dim == "cols") { - len_out <- dim(as.array(cube))[1] - names <- colnames(as.array(cube)) - } else if (dim == "rows") { - len_out <- dim(as.array(cube))[2] - names <- rownames(as.array(cube)) - } - - to_compare <- names[!(names %in% baseline)] - - out <- vapply( - names, function(one_extent) { - if (one_extent == baseline) { - return(rep(0, len_out)) # not the right shape - } - - # generate the 2xm or nx2 table tfor testing - return(compareDims(cube, baseline = baseline, x = one_extent, dim = dim)) - }, - numeric(len_out) - ) - - out <- simplify2array(out) - - # if the dimension is rows, the array that vapply returns needs to be - # transposed - if (dim == "rows") { - out <- t(out) - } - - dimnames(out) <- dimnames(as.array(cube)) - - return(out) -} # broadcast a vector of values to a matrix with dimensions dims. Similar to # but not exactly the same as numpy's `broadcast_to` method. diff --git a/cubes/cube-random-3x4.json b/cubes/cube-random-3x4.json new file mode 100644 index 000000000..f16010686 --- /dev/null +++ b/cubes/cube-random-3x4.json @@ -0,0 +1,190 @@ +{ + "element": "shoji:view", + "self": "/api/datasets/4bb830/cube/?query=%7B%22dimensions%22%3A%5B%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F4bb830%2Fvariables%2F000000%2F%22%7D%2C%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F4bb830%2Fvariables%2F000001%2F%22%7D%5D%2C%22measures%22%3A%7B%22count%22%3A%7B%22function%22%3A%22cube_count%22%2C%22args%22%3A%5B%5D%7D%7D%2C%22weight%22%3Anull%7D&filter=%7B%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [ + + ] + } + }, + "dimensions": [ + { + "variable": "/api/datasets/4bb830/variables/000000/" + }, + { + "variable": "/api/datasets/4bb830/variables/000001/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + + ] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "I", + "name": "I" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "one" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "two" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "three" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "J", + "name": "J" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "A" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "B" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "C" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "D" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 12, + 23, + 10, + 20, + 0, + 23, + 28, + 14, + 24, + 0, + 22, + 23, + 23, + 21, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": { + + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": { + + }, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "n": 243, + "unfiltered": { + "unweighted_n": 243, + "weighted_n": 243 + }, + "filtered": { + "unweighted_n": 243, + "weighted_n": 243 + }, + "counts": [ + 12, + 23, + 10, + 20, + 0, + 23, + 28, + 14, + 24, + 0, + 22, + 23, + 23, + 21, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } + } +} diff --git a/cubes/pairwise-hirotsu-illness-x-occupation.json b/cubes/pairwise-hirotsu-illness-x-occupation.json new file mode 100644 index 000000000..e1b641e0a --- /dev/null +++ b/cubes/pairwise-hirotsu-illness-x-occupation.json @@ -0,0 +1,274 @@ +{ + "element": "shoji:view", + "self": "/api/datasets/9271a0/cube/?query=%7B%22dimensions%22%3A%5B%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9271a0%2Fvariables%2F000000%2F%22%7D%2C%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9271a0%2Fvariables%2F000001%2F%22%7D%5D%2C%22measures%22%3A%7B%22count%22%3A%7B%22function%22%3A%22cube_count%22%2C%22args%22%3A%5B%5D%7D%7D%2C%22weight%22%3Anull%7D&filter=%7B%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [ + + ] + } + }, + "dimensions": [ + { + "variable": "/api/datasets/9271a0/variables/000000/" + }, + { + "variable": "/api/datasets/9271a0/variables/000001/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + + ] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "illness", + "name": "illness" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 3, + "id": 3, + "name": "slight", + "missing": false + }, + { + "numeric_value": 1, + "id": 1, + "name": "medium", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "serious", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "occupation", + "name": "occupation" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "1" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "2" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "3" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "4" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "5" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "6" + }, + { + "numeric_value": 7, + "missing": false, + "id": 7, + "name": "7" + }, + { + "numeric_value": 8, + "missing": false, + "id": 8, + "name": "8" + }, + { + "numeric_value": 9, + "missing": false, + "id": 9, + "name": "9" + }, + { + "numeric_value": 10, + "missing": false, + "id": 10, + "name": "10" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 148, + 111, + 645, + 165, + 383, + 96, + 98, + 199, + 59, + 262, + 0, + 444, + 352, + 1911, + 771, + 1829, + 293, + 330, + 874, + 199, + 1320, + 0, + 86, + 49, + 328, + 119, + 311, + 47, + 58, + 155, + 30, + 236, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": { + + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": { + + }, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "n": 11908, + "unfiltered": { + "unweighted_n": 11908, + "weighted_n": 11908 + }, + "filtered": { + "unweighted_n": 11908, + "weighted_n": 11908 + }, + "counts": [ + 148, + 111, + 645, + 165, + 383, + 96, + 98, + 199, + 59, + 262, + 0, + 444, + 352, + 1911, + 771, + 1829, + 293, + 330, + 874, + 199, + 1320, + 0, + 86, + 49, + 328, + 119, + 311, + 47, + 58, + 155, + 30, + 236, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } + } +} diff --git a/cubes/pairwise-hirotsu-occupation-x-illness.json b/cubes/pairwise-hirotsu-occupation-x-illness.json new file mode 100644 index 000000000..26dcc2614 --- /dev/null +++ b/cubes/pairwise-hirotsu-occupation-x-illness.json @@ -0,0 +1,274 @@ +{ + "element": "shoji:view", + "self": "/api/datasets/9271a0/cube/?query=%7B%22dimensions%22%3A%5B%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9271a0%2Fvariables%2F000001%2F%22%7D%2C%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9271a0%2Fvariables%2F000000%2F%22%7D%5D%2C%22measures%22%3A%7B%22count%22%3A%7B%22function%22%3A%22cube_count%22%2C%22args%22%3A%5B%5D%7D%7D%2C%22weight%22%3Anull%7D&filter=%7B%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [ + + ] + } + }, + "dimensions": [ + { + "variable": "/api/datasets/9271a0/variables/000001/" + }, + { + "variable": "/api/datasets/9271a0/variables/000000/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + + ] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "occupation", + "name": "occupation" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "1" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "2" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "3" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "4" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "5" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "6" + }, + { + "numeric_value": 7, + "missing": false, + "id": 7, + "name": "7" + }, + { + "numeric_value": 8, + "missing": false, + "id": 8, + "name": "8" + }, + { + "numeric_value": 9, + "missing": false, + "id": 9, + "name": "9" + }, + { + "numeric_value": 10, + "missing": false, + "id": 10, + "name": "10" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "alias": "illness", + "name": "illness" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 3, + "id": 3, + "name": "slight", + "missing": false + }, + { + "numeric_value": 1, + "id": 1, + "name": "medium", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "serious", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 148, + 444, + 86, + 0, + 111, + 352, + 49, + 0, + 645, + 1911, + 328, + 0, + 165, + 771, + 119, + 0, + 383, + 1829, + 311, + 0, + 96, + 293, + 47, + 0, + 98, + 330, + 58, + 0, + 199, + 874, + 155, + 0, + 59, + 199, + 30, + 0, + 262, + 1320, + 236, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": { + + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": { + + }, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "n": 11908, + "unfiltered": { + "unweighted_n": 11908, + "weighted_n": 11908 + }, + "filtered": { + "unweighted_n": 11908, + "weighted_n": 11908 + }, + "counts": [ + 148, + 444, + 86, + 0, + 111, + 352, + 49, + 0, + 645, + 1911, + 328, + 0, + 165, + 771, + 119, + 0, + 383, + 1829, + 311, + 0, + 96, + 293, + 47, + 0, + 98, + 330, + 58, + 0, + 199, + 874, + 155, + 0, + 59, + 199, + 30, + 0, + 262, + 1320, + 236, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } + } +} diff --git a/cubes/same-counts-3x4.json b/cubes/same-counts-3x4.json new file mode 100644 index 000000000..8219bcda9 --- /dev/null +++ b/cubes/same-counts-3x4.json @@ -0,0 +1,190 @@ +{ + "element": "shoji:view", + "self": "/api/datasets/9c9001/cube/?query=%7B%22dimensions%22%3A%5B%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9c9001%2Fvariables%2F000000%2F%22%7D%2C%7B%22variable%22%3A%22%2Fapi%2Fdatasets%2F9c9001%2Fvariables%2F000001%2F%22%7D%5D%2C%22measures%22%3A%7B%22count%22%3A%7B%22function%22%3A%22cube_count%22%2C%22args%22%3A%5B%5D%7D%7D%2C%22weight%22%3Anull%7D&filter=%7B%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [ + + ] + } + }, + "dimensions": [ + { + "variable": "/api/datasets/9c9001/variables/000000/" + }, + { + "variable": "/api/datasets/9c9001/variables/000001/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + + ] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "Var1", + "name": "Var1" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "id": 1, + "name": "1", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "2", + "missing": false + }, + { + "numeric_value": 3, + "id": 3, + "name": "3", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "references": { + "alias": "Var2", + "name": "Var2" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "id": 1, + "name": "a", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "b", + "missing": false + }, + { + "numeric_value": 3, + "id": 3, + "name": "c", + "missing": false + }, + { + "numeric_value": 4, + "id": 4, + "name": "d", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 10, + 10, + 10, + 10, + 0, + 20, + 20, + 20, + 20, + 0, + 30, + 30, + 30, + 30, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": { + + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": { + + }, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "n": 240, + "unfiltered": { + "unweighted_n": 240, + "weighted_n": 240 + }, + "filtered": { + "unweighted_n": 240, + "weighted_n": 240 + }, + "counts": [ + 10, + 10, + 10, + 10, + 0, + 20, + 20, + 20, + 20, + 0, + 30, + 30, + 30, + 30, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } + } +} diff --git a/inst/cubes.tgz b/inst/cubes.tgz index 12a734c8a..14391729e 100644 Binary files a/inst/cubes.tgz and b/inst/cubes.tgz differ diff --git a/man/calcTransforms.Rd b/man/calcTransforms.Rd index 0697d2ad8..3435d67a9 100644 --- a/man/calcTransforms.Rd +++ b/man/calcTransforms.Rd @@ -7,7 +7,7 @@ calcTransforms(array, insert_funcs, dim_names = names(insert_funcs)) } \arguments{ -\item{array}{the array to use (this is most likely from a \code{CrunchCube`` object). The cells from this array are the cube_cells referred to in }include`} +\item{array}{the array to use (this is most likely from a \code{CrunchCube`` object). The cells from this array are the cube_cells referred to in}include`} \item{trans}{a \code{Transform} object to get the transformations to calculate} diff --git a/man/dimension-comparison-pairwise.Rd b/man/dimension-comparison-pairwise.Rd index 378364aa4..39c00d797 100644 --- a/man/dimension-comparison-pairwise.Rd +++ b/man/dimension-comparison-pairwise.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cube-residuals.R +% Please edit documentation in R/cube-comparisons.R \name{dimension-comparison-pairwise} \alias{dimension-comparison-pairwise} \alias{compareColsPairwise} @@ -11,23 +11,27 @@ compareColsPairwise(cube, ...) compareRowsPairwise(cube, ...) -compareDimsPairwise(cube, dim = c("cols", "rows"), baseline) +compareDimsPairwise(cube, dim = c("cols", "rows"), baseline, + value = c("statistic", "p.value")) } \arguments{ -\item{cube}{a cube to calculate the comparison on} +\item{cube}{a CrunchCube to calculate the comparison on} \item{...}{arguments passed from \code{compareRowsPairwise()} or -\code{compareColsPairwise()} to \code{compareDimsPairwise()} (i.e. \code{baseline})} +\code{compareColsPairwise()} to \code{compareDimsPairwise()} (e.g., \code{baseline})} \item{dim}{which dimension is being compared (\code{rows} or \code{cols}, only valid for \code{compareDims()})} -\item{baseline}{a character, the column to use as a baseline to compare -against all other columns} +\item{baseline}{a character, the name of a column or row to compare +against each of the others in turn} + +\item{value}{Value of the test to return. Defaults to \code{statistic}; use \code{p.value} +for the p-value of the test of the pair of columns/rows.} } \value{ -an array of z-score for the all the columns or rows compared to -\code{baseline}. The \code{baseline} column is all 0s +a vector of representing the baseline column’s test statistic +with respect to each of the others. } \description{ Given a single baseline column compare each other row or column against this diff --git a/man/dimension-comparison.Rd b/man/dimension-comparison.Rd index 09d2fd96c..6a5f1d4ed 100644 --- a/man/dimension-comparison.Rd +++ b/man/dimension-comparison.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cube-residuals.R +% Please edit documentation in R/cube-comparisons.R \name{dimension-comparison} \alias{dimension-comparison} \alias{compareCols} @@ -11,26 +11,30 @@ compareCols(cube, ...) compareRows(cube, ...) -compareDims(cube, dim = c("cols", "rows"), baseline, x) +compareDims(cube, dim = c("cols", "rows"), baseline, x, + value = c("statistic", "p.value")) } \arguments{ -\item{cube}{a cube to calculate the comparison on} +\item{cube}{a CrunchCube to calculate the comparison on} \item{...}{arguments passed from \code{compareRows()} or \code{compareCols()} to \code{compareDims()} (i.e. \code{baseline} and \code{x})} -\item{dim}{which dimension is being compared (\code{rows} or \code{cols}, only valid +\item{dim}{which dimension is being compared (\code{cols} or \code{rows}, only valid for \code{compareDims()})} -\item{baseline}{a character, the column to use as a baseline to compare \code{x} +\item{baseline}{a character, the column/row to use as a baseline to compare \code{x} against} -\item{x}{a character, the column to compare against the baseline} +\item{x}{a character, the column/row to compare against the baseline} + +\item{value}{Value of the test to return. Defaults to \code{statistic}; use \code{p.value} +for the p-value of the test of the pair of columns/rows.} } \value{ -the z-score for the column or row given in \code{x} +the value for the column or row given in \code{x}, default is Chi-squared } \description{ Comparing a column or row with a baseline column or row. This calculates the -z-score for the cells when comparing \code{x} to the baseline columns +test statistic \eqn{\chi^2} of independence for each pair of columns/rows. } diff --git a/man/pairwiseMatrix.Rd b/man/pairwiseMatrix.Rd new file mode 100644 index 000000000..80d6f1041 --- /dev/null +++ b/man/pairwiseMatrix.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cube-comparisons.R +\name{pairwiseMatrix} +\alias{pairwiseMatrix} +\title{Matrix of Chi-Squared Statistics for all rows or columns} +\usage{ +pairwiseMatrix(cube, dim = c("cols", "rows"), value = c("statistic", + "p.value")) +} +\arguments{ +\item{cube}{a CrunchCube representing counts of a categorical by categorical +contingency table.} + +\item{dim}{which dimension along which to compare (\code{cols} or \code{rows})} + +\item{value}{Value of the test to return. Defaults to \code{statistic}; use \code{p.value} +for the p-value of the test of the pair of columns/rows.} +} +\value{ +A symmetric square matrix of all column-comparison or row-comparison +\eqn{\chi^2} statistics. Typical element \eqn{i,j} is the test statistic equivalent +to \code{chisq.test} subsetting to just columns i and j (for dim="cols"). +} +\description{ +Generate a matrix of pairwise comparisons of rows or columns, each against +the others. +} +\examples{ +\dontrun{ +some_cube <- crunch_example_data(cat_by_cat) + +# By default, return the 'statistic' of chisq.test, evaluated +# for each pair of columns pairwise: +pairwiseMatrix(some_cube) + +# or each pair of rows: +pairwiseMatrix(some_cube, dim="rows") + +# Or, specify another "value" from `chisq.test`: +pairwiseMatrix(some_cube, value="p.value") +} +} diff --git a/man/wishartMatrix.Rd b/man/wishartMatrix.Rd new file mode 100644 index 000000000..05c3e4f1a --- /dev/null +++ b/man/wishartMatrix.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cube-comparisons.R +\name{wishartMatrix} +\alias{wishartMatrix} +\title{Matrix of Chi-Squared Statistics for all rows or columns} +\usage{ +wishartMatrix(cube, dim = c("cols", "rows")) +} +\arguments{ +\item{cube}{a CrunchCube of counts: a crosstab of two categorical variables} + +\item{dim}{dimension along which to compare proportions (default \code{cols} or \code{rows})} +} +\value{ +A symmetric square matrix of Chi-squared statistics for a crosstab +} +\description{ +Use the alternative Wishart method of forming the matrix of column- or row-wise +comparison Chi-squared test statistics for a categorical-by-categorical +contingency table. +} +\examples{ +\dontrun{ +# TODO: include Hirotsu occupation-illness example cube and output +illness_occupation <- crunch_example_data(hirotsu) +# Chi-squared statistic for joint null hypothesis that each +# column is equal to each other column. + +wishartMatrix(illness_occupation) +} +} diff --git a/man/wishartPvalues.Rd b/man/wishartPvalues.Rd new file mode 100644 index 000000000..ff24c1ff3 --- /dev/null +++ b/man/wishartPvalues.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cube-comparisons.R +\name{wishartPvalues} +\alias{wishartPvalues} +\title{P-values for pairwise comparison on a dimension of a categorical crosstab} +\usage{ +wishartPvalues(cube, dim = c("cols", "rows")) +} +\arguments{ +\item{cube}{a CrunchCube} + +\item{dim}{dimension which jointly forms the null hypothesis that each +vector is equal to all of the others.} +} +\value{ +A symmetric square matrix of the size of \code{dim}. Typical element +\eqn{i,j} is the P-value associated with the hypothesis that proportions in +row or column \eqn{i} are equal to those in row or column \eqn{j}. +} +\description{ +P-values for pairwise comparison on a dimension of a categorical crosstab +} +\examples{ +\dontrun{ +# TODO: include Hirotsu occupation-illness example cube and output +illness_occupation <- crunch_example_data(hirotsu) +# P-value for joint null hypothesis that each +# column is equal to each other column. + +wishartPvalues(illness_occupation) +} +} diff --git a/tests/testthat/test-cube-pairwise.R b/tests/testthat/test-cube-pairwise.R new file mode 100644 index 000000000..b1993c745 --- /dev/null +++ b/tests/testthat/test-cube-pairwise.R @@ -0,0 +1,166 @@ +context("Cube pairwise multiple comparisons") + +########################################## +## fixutres from crunch cube +########################################## +hirotsu <- loadCube("cubes/pairwise-hirotsu-illness-x-occupation.json") +hirotsuTranspose <- loadCube("cubes/pairwise-hirotsu-occupation-x-illness.json") +sampled <- loadCube("cubes/cube-random-3x4.json") +samecounts <- loadCube("cubes/same-counts-3x4.json") + +test_that("Reference Chi-square from Z-tests.Rmd, 2017-10-26", { + reference <- structure(c(NA, 1.79366044505363, 1.44505144460352, 1.52650609891684, + 1.79366044505363, NA, 3.96027410389113, 0.0252758760361623, 1.44505144460352, + 3.96027410389113, NA, 3.24682678277303, 1.52650609891684, 0.0252758760361623, + 3.24682678277303, NA), .Dim = c(4L, 4L), + .Dimnames = list(c("A", "B", "C", "D"), c("A", "B", "C", "D"))) + out <- pairwiseMatrix(sampled) + expect_equal(out, reference) +}) +test_that("Reference p values from Z-tests.Rmd, 2017-10-26", { + reference <- structure(c(NA, 0.407860439751267, 0.48552440618817, 0.466147556812173, + 0.407860439751267, NA, 0.138050315949931, 0.987441585364219, + 0.48552440618817, 0.138050315949931, NA, 0.197224344940826, 0.466147556812173, + 0.987441585364219, 0.197224344940826, NA), .Dim = c(4L, 4L), + .Dimnames = list(c("A", "B", "C", "D"), c("A", "B", "C", "D"))) + out <- pairwiseMatrix(sampled, value="p.value") + expect_equal(out, reference) +}) + +test_that("Pairwise comparison matrix for columns is equal to reference values", { + # Note: These reference values generated from hirotsu data after building it using + # the above sampled data. They differ from the Wishart versions found in Hirotsu, + # but are included here if pairwise comparison is indeed wanted. + reference <- structure(c(NA, 2.96093540688393, 0.932829050525787, 12.8371652127878, + 17.9893864373252, 0.928545742734045, 0.747992013063412, 9.63186987962947, + 1.43158631894219, 20.1171706792372, 2.96093540688393, NA, 1.76847831300577, + 8.98098998206645, 14.6017275773817, 0.435494345393219, 1.59902977683344, + 9.24994844117764, 0.260499250030443, 17.8112547073445, 0.932829050525787, + 1.76847831300577, NA, 22.2681630243698, 45.1233163291918, 0.188765827926188, + 1.19551172815931, 20.1305719083303, 0.945032186576934, 45.5038244877042, + 12.8371652127878, 8.98098998206645, 22.2681630243698, NA, 0.816415213502732, + 8.73362589158256, 5.36480999045035, 1.25267943759809, 3.83466528508909, + 2.26279769006118, 17.9893864373252, 14.6017275773817, 45.1233163291918, + 0.816415213502732, NA, 12.9245987246456, 7.58169347682279, 0.817077282842828, + 5.81838330801743, 0.781754999092818, 0.928545742734045, 0.435494345393219, + 0.188765827926188, 8.73362589158256, 12.9245987246456, NA, 0.660878111661309, + 7.75618456211458, 0.303388613890485, 15.6051920129687, 0.747992013063412, + 1.59902977683344, 1.19551172815931, 5.36480999045035, 7.58169347682279, + 0.660878111661309, NA, 3.8133007453977, 0.413328427993386, 9.63892077794325, + 9.63186987962947, 9.24994844117764, 20.1305719083303, 1.25267943759809, + 0.817077282842828, 7.75618456211458, 3.8133007453977, NA, 3.58397569023511, + 1.8412813174768, 1.43158631894219, 0.260499250030443, 0.945032186576934, + 3.83466528508909, 5.81838330801743, 0.303388613890485, 0.413328427993386, + 3.58397569023511, NA, 7.73836037271873, 20.1171706792372, 17.8112547073445, + 45.5038244877042, 2.26279769006118, 0.781754999092818, 15.6051920129687, + 9.63892077794325, 1.8412813174768, 7.73836037271873, NA), + .Dim = c(10L, 10L), + .Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8","9", "10"), + c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))) + + out <- pairwiseMatrix(hirotsu, dim="cols") + expect_equal(out, reference) +}) + +test_that("Pairwise row is identical to direct transpose", { + reference <- structure(c(NA, 87.9657697538273, 48.4241001009927, + 87.9657697538273, NA, 5.23229445690705, + 48.4241001009927, 5.23229445690705, NA), + .Dim = c(3L, 3L), + .Dimnames = list(c("slight", "medium", "serious"), + c("slight", "medium", "serious"))) + rowMatrix <- pairwiseMatrix(hirotsuTranspose, dim="cols") + colMatrix <- pairwiseMatrix(hirotsu, dim="rows") + expect_equal(rowMatrix, colMatrix) + expect_equal(rowMatrix, reference) +}) + +test_that("Wishart Chi-squared from Hirotsu, all-possible-comparisons.pdf, 2018-12-15", { + reference <- structure(c(0, 2.82191015811665, 0.925971181878173, 12.7808554481281, + 16.797278696301, 0.924655442873681, 0.800897626931245, 9.61697239870243, + 1.44968631245103, 18.5560989371817, 2.82191015811665, 0, 1.68311327379593, + 8.68347185218156, 13.4510531592651, 0.38467827774871, 1.50949615300718, + 9.081312924348, 0.258339854060561, 16.3533306337074, 0.925971181878173, + 1.68311327379593, 0, 24.3489354234647, 46.6893860778998, 0.184708228257528, + 1.3765987079862, 22.0636585403878, 1.01021187951098, 47.6212400456597, + 12.7808554481281, 8.68347185218156, 24.3489354234647, 0, 0.807397908326374, + 8.49064125921564, 5.14174069410539, 1.25360048488748, 3.57624174509225, + 2.19745619878766, 16.797278696301, 13.4510531592651, 46.6893860778998, + 0.807397908326374, 0, 11.7920120113265, 6.84760936784522, 0.743555569450378, + 5.2183904567275, 0.725476017865348, 0.924655442873681, 0.38467827774871, + 0.184708228257528, 8.49064125921564, 11.7920120113265, 0, 0.707253783195803, + 7.620018353425, 0.332196968531903, 14.0875915538107, 0.800897626931245, + 1.50949615300718, 1.3765987079862, 5.14174069410539, 6.84760936784522, + 0.707253783195803, 0, 3.67243544094674, 0.396743262086735, 8.54615901952498, + 9.61697239870243, 9.081312924348, 22.0636585403878, 1.25360048488748, + 0.743555569450378, 7.620018353425, 3.67243544094674, 0, 3.4464292421171, + 1.59166956338692, 1.44968631245103, 0.258339854060561, 1.01021187951098, + 3.57624174509225, 5.2183904567275, 0.332196968531903, 0.396743262086735, + 3.4464292421171, 0, 6.85424450468994, 18.5560989371817, 16.3533306337074, + 47.6212400456597, 2.19745619878766, 0.725476017865348, 14.0875915538107, + 8.54615901952498, 1.59166956338692, 6.85424450468994, 0), + .Dim = c(10L, 10L), + .Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), + c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))) + out <- wishartMatrix(hirotsu) + expect_equal(out, reference) +}) + +test_that("Wishart rows equal to transposed cube", { + reference <- structure(c(0, 88.0035314184012, 48.7265942784996, 88.0035314184012, + 0, 5.14221680596919, 48.7265942784996, 5.14221680596919, 0), + .Dim = c(3L, 3L), + .Dimnames = list(c("slight", "medium", "serious"), + c("slight", "medium", "serious"))) + out <- wishartMatrix(hirotsu, dim="rows") + expect_equal(out, reference) + alt_out <- wishartMatrix(hirotsuTranspose) + expect_equal(out, alt_out) +}) + +test_that("Wishart P-values from reference in Hirotsu paper", { + reference <- structure(c(1, 0.999603716443816, 0.99999993076784, 0.435830186989942, + 0.171365670494448, 0.999999931581745, 0.999999979427862, 0.726740806122402, + 0.999997338047414, 0.105707739899106, 0.999603716443816, 1, 0.999991395396033, + 0.806407150042716, 0.380296648898666, 0.999999999961875, 0.999996333717649, + 0.773583582093158, 0.999999999998836, 0.192375246184738, 0.99999993076784, + 0.999991395396033, 1, 0.017277623171216, 3.29012189337341e-06, + 0.99999999999994, 0.999998237045896, 0.0365273119329589, 0.999999857555538, + 2.23456306602809e-06, 0.435830186989942, 0.806407150042716, 0.017277623171216, + 1, 0.999999977981595, 0.821586701043061, 0.982573114952466, 0.999999169027016, + 0.998041030837588, 0.999934687968906, 0.171365670494448, 0.380296648898666, + 3.29012189337341e-06, 0.999999977981595, 1, 0.52406354520284, + 0.926322806048378, 0.99999998900118, 0.981100354607917, 0.999999991067971, + 0.999999931581745, 0.999999999961875, 0.99999999999994, 0.821586701043061, + 0.52406354520284, 1, 0.999999992799126, 0.883025655503086, 0.99999999998941, + 0.33149560264078, 0.999999979427862, 0.999996333717649, 0.999998237045896, + 0.982573114952466, 0.926322806048378, 0.999999992799126, 1, 0.997674862917282, + 0.99999999995011, 0.81726901111794, 0.726740806122402, 0.773583582093158, + 0.0365273119329589, 0.999999169027016, 0.99999998900118, 0.883025655503086, + 0.997674862917282, 1, 0.998461227115608, 0.999994436499243, 0.999997338047414, + 0.999999999998836, 0.999999857555538, 0.998041030837588, 0.981100354607917, + 0.99999999998941, 0.99999999995011, 0.998461227115608, 1, 0.925999125959122, + 0.105707739899106, 0.192375246184738, 2.23456306602809e-06, 0.999934687968906, + 0.999999991067971, 0.33149560264078, 0.81726901111794, 0.999994436499243, + 0.925999125959122, 1), .Dim = c(10L, 10L), + .Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), + c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))) + out <- wishartPvalues(hirotsu) + expect_equal(out, reference) + + out_t <- wishartPvalues(hirotsuTranspose, dim="row") + expect_equal(out, out_t) +}) + +test_that("a matrix where each row and column is the same", { + referenceChisq <- structure(rep(0, 16), + .Dim = c(4L, 4L), + .Dimnames = list(c("a", "b", "c", "d"), c("a", "b", "c", "d"))) + referencePvals <- structure(rep(1, 16), + .Dim = c(4L, 4L), + .Dimnames = list(c("a", "b", "c", "d"), c("a", "b", "c", "d"))) + out1 <- wishartMatrix(samecounts) + expect_equal(out1, referenceChisq) + out2 <- wishartPvalues(samecounts) + expect_equal(out2, referencePvals) +}) diff --git a/tests/testthat/test-cube-residuals-zed-scores.R b/tests/testthat/test-cube-residuals-zed-scores.R index 791e1f7b9..2c449be80 100644 --- a/tests/testthat/test-cube-residuals-zed-scores.R +++ b/tests/testthat/test-cube-residuals-zed-scores.R @@ -190,74 +190,55 @@ test_that("residuals for catarray", { }) test_that("compareCols()", { - expected_zScores <- cubify( - 2.54925480834223, - -2.54925480834223, - dims = list( - Gender = c("Male", "Female"), - RespondentIdeology = c("Very Conservative") - ) - ) + expected_chisq = structure(5.74120651376478, .Names = "X-squared") expect_equal( compareCols( gender_x_ideology, baseline = "Very liberal", x = "Very Conservative" ), - expected_zScores + expected_chisq ) - - expected_zScores <- cubify( - -2.54925480834223, - 2.54925480834223, - dims = list( - Gender = c("Male", "Female"), - RespondentIdeology = c("Very liberal") - ) + expect_equal( + compareCols( + gender_x_ideology, + baseline = "Very liberal", + x = "Very Conservative", + value="p.value" + ), + 0.0165714055735903 ) + expect_equal( compareCols( gender_x_ideology, baseline = "Very Conservative", x = "Very liberal" ), - expected_zScores + expected_chisq ) }) test_that("compareRows()", { cat_by_cat <- noTransforms(cat_by_cat) - expected_zScores <- cubify( - -0.97433731917929, 0.97433731917929, - dims = list( - feelings = c("extremely unhappy"), - animals = c("cats", "dogs") - ) - ) + expected_chisq <- structure(0.402258403361344, .Names = "X-squared") expect_equal( compareRows( cat_by_cat, baseline = "extremely happy", x = "extremely unhappy" ), - expected_zScores + expected_chisq ) - expected_zScores <- cubify( - 0.97433731917929, -0.97433731917929, - dims = list( - feelings = c("extremely happy"), - animals = c("cats", "dogs") - ) - ) expect_equal( compareRows( cat_by_cat, baseline = "extremely unhappy", x = "extremely happy" ), - expected_zScores + expected_chisq ) }) @@ -340,25 +321,6 @@ test_that("compareDims() with MRs", { ) ) - # But if the MR is not the dimension being compared amongst, we still - # calculate a score - expected_zScores <- cubify( - -1.34840705967846, - -0.319502930145056, - -2.44219465036739, - -3.14883276639645, - 4.11744429667266, - dims = list( - food_groups = c("Vegetables"), - nordics = c("Denmark", "Finland", "Iceland", "Norway", "Sweden") - ) - ) - - expect_equal( - compareRows(cat_by_mr_NSS_alltypes, baseline = "Fruit", x = "Vegetables"), - expected_zScores - ) - expect_error( compareRows( mr_by_cat, @@ -375,48 +337,29 @@ test_that("compareDims() with MRs", { test_that("compareColsPairwise()", { - expected_zScores <- t(cubify( - compareCols(gender_x_ideology, baseline = "Moderate", x = "Very liberal"), - compareCols(gender_x_ideology, baseline = "Moderate", x = "Liberal"), - 0, 0, - compareCols(gender_x_ideology, baseline = "Moderate", x = "Conservative"), - compareCols(gender_x_ideology, baseline = "Moderate", x = "Very Conservative"), - compareCols(gender_x_ideology, baseline = "Moderate", x = "Not sure"), - dims = list( - RespondentIdeology = c( - "Very liberal", "Liberal", "Moderate", "Conservative", "Very Conservative", "Not sure" - ), - Gender = c("Male", "Female") - ) - )) + expected_chisq <- structure( + c(0.00107052128340058, 0.0331121511293606, NA, 1.98950334805864, 10.4501791770671, 1.99181579553824), + .Names = c("Very liberal", "Liberal", "Moderate", "Conservative", "Very Conservative", "Not sure")) + expect_equal( compareColsPairwise( gender_x_ideology, baseline = "Moderate" ), - expected_zScores + expected_chisq ) }) test_that("compareRowsPairwise()", { cat_by_cat <- noTransforms(cat_by_cat) - expected_zScores <- cubify( - compareRows(cat_by_cat, baseline = "neutral", x = "extremely happy"), - compareRows(cat_by_cat, baseline = "neutral", x = "somewhat happy"), - 0, 0, - compareRows(cat_by_cat, baseline = "neutral", x = "somewhat unhappy"), - compareRows(cat_by_cat, baseline = "neutral", x = "extremely unhappy"), - dims = list( - feelings = c( - "extremely happy", "somewhat happy", "neutral", - "somewhat unhappy", "extremely unhappy" - ), - animals = c("cats", "dogs") - ) - ) + expected_chisq <- + structure(c(1.53789746828801e-31, 0.306520996845184, NA, 0.255275471432593, 0.465373961218836), .Dim = c(1L, 5L), + .Dimnames = list(NULL,c("extremely happy", "somewhat happy", "neutral", "somewhat unhappy", "extremely unhappy")), + .Names = c("extremely happy", "somewhat happy", "neutral", "somewhat unhappy", "extremely unhappy")) + expect_equal( compareRowsPairwise(cat_by_cat, baseline = "neutral"), - expected_zScores + expected_chisq ) }) diff --git a/tests/testthat/test-cube-residuals.R b/tests/testthat/test-cube-residuals.R index 4406b26c1..392464436 100644 --- a/tests/testthat/test-cube-residuals.R +++ b/tests/testthat/test-cube-residuals.R @@ -31,3 +31,8 @@ test_that("broadcast returns a matrix that mataches", { fixed = TRUE ) }) + +test_that("scalars broadcast too", { + array <- array(c(1,1), dim=c(1,2)) + expect_equal(broadcast(1, dim=c(1,2)), array) +})