Brown and Forsythe (1974) simulation (original) (raw)
Simulation from:
- Brown, M. B. and Forsythe, A. B. (1974). Robust tests for the equality of variances. Journal of the American Statistical Association, 69(346), 364–367.
User-defined functions for analyses
#-------------------------------------------------------------------
# Custom functions required (not defined in other R packages)
#' Jacknife variance test
#'
#' @param DV a numeric vector
#' @param group a character/factor vector containing the group membership
Jacknife_vartest <- function(DV, group){
stopifnot(length(DV) == length(group))
nms <- unique(group)
g <- length(nms)
Ns <- s2 <- Ui. <- numeric(g)
Uij <- vector('list', g)
for(i in 1:g){
pick <- group == nms[i]
sub <- subset(DV, pick)
Ns[i] <- length(sub)
s2[i] <- var(sub)
sij2 <- numeric(Ns[i])
for(j in 1:length(sub))
sij2[j] <- var(sub[-j])
Uij[[i]] <- Ns[i] * log(s2[i]) - (Ns[i]-1) * log(sij2)
Ui.[i] <- mean(Uij[[i]])
}
U.. <- mean(do.call(c, Uij))
num <- sum(Ns * (Ui. - U..)^2) / (g-1)
den <- 0
for(i in 1:g)
den <- den + sum((Uij[[i]] - Ui.[i])^2) / sum(Ns-1)
J <- num/den
df1 <- g-1; df2 <- sum(Ns-1)
p.value <- pf(J, df1, df2, lower.tail = FALSE)
data.frame(J=J, df1=df1, df2=df2, p.value=p.value, row.names="")
}
#' Layard's variance test
#'
#' @param DV a numeric vector
#' @param group a character/factor vector containing the group membership
Layard_vartest <- function(DV, group){
stopifnot(length(DV) == length(group))
nms <- unique(group)
Ns <- table(group)
N <- sum(Ns)
g <- length(nms)
log_s2 <- numeric(g)
deviation <- numeric(N)
for(i in 1:g){
pick <- group == nms[i]
sub <- subset(DV, pick)
xbar <- mean(sub)
deviation[pick] <- sub - xbar
log_s2[i] <- log(var(sub))
}
gamma <- N * sum(deviation^4) / (sum(deviation^2)^2) - 3
tau2 <- 2 + (1 - g/N) * gamma
S <- sum( (Ns-1) * (log_s2 - sum((Ns-1) * log_s2)/sum(Ns-1))^2) / tau2
df <- g - 1
p.value <- pchisq(S, df, lower.tail=FALSE)
data.frame(S=S, df=df, p.value=p.value, row.names = "")
}
Simulation code
#-------------------------------------------------------------------
# Define the design conditions
library(SimDesign)
Design <- createDesign(var_ratio=c(4, 2, 1, 1/2, 1/4),
N=c(80, 20),
groups_equal=c(TRUE, FALSE),
distribution=c('Gaussian', 't4', 'Chi4', 'Cauchy'),
# remove redundant or not-applicable rows
subset = !(groups_equal & var_ratio < 1) |
!(distribution != 'Gaussian' & var_ratio != 1))
# Brown and Forsythe use different sample sizes when groups were unequal
pick1 <- with(Design, !groups_equal & N == 80)
pick2 <- with(Design, !groups_equal & N == 20)
Design$N[pick1] <- 60
Design$N[pick2] <- 30
Design
#-------------------------------------------------------------------
Generate <- function(condition, fixed_objects = NULL) {
Attach(condition)
sd1 <- ifelse(var_ratio <= 1, 1, sqrt(var_ratio))
sd2 <- ifelse(var_ratio <= 1, sqrt(1/var_ratio), 1)
if(groups_equal){
N1 <- N2 <- N/2
} else {
N1 <- N * 1/3
N2 <- N - N1
}
dv <- switch(distribution,
Gaussian = c(rnorm(N1, sd = sd1), rnorm(N2, sd = sd2)),
t4 = c(rt(N1, df = 4), rt(N2, df = 4)),
Chi4 = c(rchisq(N1, df = 4), rchisq(N2, df = 4)),
Cauchy = c(rcauchy(N1), rcauchy(N2))
)
dat <- data.frame(DV=dv,
group=c(rep('G1', N1), rep('G2', N2)))
dat
}
Analyse <- function(condition, dat, fixed_objects = NULL) {
Attach(dat)
F_test <- var.test(DV ~ group)$p.value
Jacknife <- Jacknife_vartest(DV, group)$p.value
Layard <- Layard_vartest(DV, group)$p.value
W50 <- levene.test(DV, group=group, location = 'median')$p.value
W10 <- levene.test(DV, group=group, location = 'trim.mean',
trim.alpha = .1)$p.value
Levene <- levene.test(DV, group=group, location = 'mean')$p.value
ret <- c(F=F_test, Jacknife=Jacknife, Layard=Layard,
Levene=Levene, W10=W10, W50=W50)
ret
}
Summarise <- function(condition, results, fixed_objects = NULL) {
ret <- EDR(results, alpha = .05)
ret
}
#-------------------------------------------------------------------
# Run the MCS
res <- runSimulation(design=Design, replications=1000, parallel=TRUE,
generate=Generate, analyse=Analyse,
summarise=Summarise, packages='lawstat',
filename='BF_simulation')
res
# A tibble: 68 × 14
var_ratio N groups_equal distribution F Jacknife Layard Levene W10
<dbl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 80 TRUE Gaussian 0.995 0.99 0.99 0.978 0.978
2 2 80 TRUE Gaussian 0.538 0.52 0.533 0.478 0.472
3 1 80 TRUE Gaussian 0.047 0.039 0.041 0.039 0.04
4 0.5 80 TRUE Gaussian 0.577 0.56 0.564 0.51 0.501
5 0.25 80 TRUE Gaussian 0.99 0.984 0.986 0.978 0.975
6 4 20 TRUE Gaussian 0.517 0.421 0.517 0.439 0.42
7 2 20 TRUE Gaussian 0.153 0.134 0.177 0.152 0.137
8 1 20 TRUE Gaussian 0.047 0.059 0.07 0.059 0.052
9 0.5 20 TRUE Gaussian 0.166 0.134 0.189 0.135 0.129
10 0.25 20 TRUE Gaussian 0.513 0.412 0.52 0.435 0.403
# ℹ 58 more rows
# ℹ 5 more variables: W50 <dbl>, REPLICATIONS <int>, SIM_TIME <chr>,
# COMPLETED <chr>, SEED <int>