Stochastic root solving of independent samples t-tests example (original) (raw)
Step 1: create design and solve for missing values (NA)
library(SimDesign)
Design <- createDesign(N = NA,
d = c(.2, .5, .8),
sig.level = .05)
Design # solve for NA's
# A tibble: 3 × 3
N d sig.level
<dbl> <dbl> <dbl>
1 NA 0.2 0.05
2 NA 0.5 0.05
3 NA 0.8 0.05
Step 2 — Define generate, analyse, and summarise functions
Generate <- function(condition, fixed_objects = NULL) {
Attach(condition)
group1 <- rnorm(N)
group2 <- rnorm(N, mean=d)
dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
DV = c(group1, group2))
dat
}
Analyse <- function(condition, dat, fixed_objects = NULL) {
p <- t.test(DV ~ group, dat, var.equal=TRUE)$p.value
p
}
Summarise <- function(condition, results, fixed_objects = NULL) {
# Must return a single number corresponding to f(x) in the
# root equation f(x) = b
ret <- EDR(results, alpha = condition$sig.level)
ret
}
Step 3 — Optimize N over the rows in design
# Initial search between N = [10,500] for each row using the default
# integer solver (integer = TRUE)
# In this example, b = target power.
# Terminate if prediction CI is consistently within [.795, .805]
solved <- SimSolve(design=Design, b=.8, interval=c(10, 500),
generate=Generate, analyse=Analyse,
summarise=Summarise, predCI.tol = .01, verbose=FALSE)
solved
# A tibble: 3 × 3
N d sig.level
<dbl> <dbl> <dbl>
1 393.39 0.2 0.05
2 63.167 0.5 0.05
3 25.471 0.8 0.05
$condition_1
$root
[1] 393.3934
$predCI.root
CI_2.5 CI_97.5
391.4833 395.3150
$b
[1] 0.8
$predCI.b
[1] 0.7952861 0.8046320
$terminated_early
[1] TRUE
$time
[1] 52.08s
$iterations
[1] 99
$total.replications
[1] 32400
$tab
y x reps
11 0.7976190 390 420
14 0.7953886 393 11710
15 0.8029930 394 5680
16 0.8060408 395 12250
18 0.8457143 397 350
$condition_2
$root
[1] 63.16727
$predCI.root
CI_2.5 CI_97.5
62.41471 63.93166
$b
[1] 0.8
$predCI.b
[1] 0.7951040 0.8048077
$terminated_early
[1] TRUE
$time
[1] 47.00s
$iterations
[1] 98
$total.replications
[1] 31900
$tab
y x reps
6 0.7714286 60 1190
7 0.7868687 61 990
8 0.7977821 62 10370
9 0.7924797 63 4920
10 0.8008272 64 10880
11 0.8208333 65 1200
12 0.8600000 66 400
$condition_3
$root
[1] 25.47103
$predCI.root
CI_2.5 CI_97.5
25.08662 25.84527
$b
[1] 0.8
$predCI.b
[1] 0.7933248 0.8065120
$terminated_early
[1] FALSE
$time
[1] 47.49s
$iterations
[1] 100
$total.replications
[1] 32900
$tab
y x reps
1 0.7921101 25 10900
2 0.8077287 26 6340
3 0.8272152 27 14220
4 0.8418919 28 740
# also can plot median history and estimate precision
plot(solved, 1, type = 'history')
plot(solved, 1, type = 'density')
Warning in density.default(x, weights = reps/sum(reps)): Selecting bandwidth
*not* using 'weights'
# verify with true power from pwr package
library(pwr)
pwr.t.test(d=.2, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 393.4057
d = 0.2
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.5, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 63.76561
d = 0.5
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.8, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 25.52458
d = 0.8
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
# use estimated N results to see how close power was
N <- solved$N
pwr.t.test(d=.2, n=N[1], sig.level = .05)
Two-sample t test power calculation
n = 393.3934
d = 0.2
sig.level = 0.05
power = 0.7999878
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.5, n=N[2], sig.level = .05)
Two-sample t test power calculation
n = 63.16727
d = 0.5
sig.level = 0.05
power = 0.7962324
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.8, n=N[3], sig.level = .05)
Two-sample t test power calculation
n = 25.47103
d = 0.8
sig.level = 0.05
power = 0.7991415
alternative = two.sided
NOTE: n is number in *each* group
# with rounding
N <- ceiling(solved$N)
pwr.t.test(d=.2, n=N[1], sig.level = .05)
Two-sample t test power calculation
n = 394
d = 0.2
sig.level = 0.05
power = 0.8005931
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.5, n=N[2], sig.level = .05)
Two-sample t test power calculation
n = 64
d = 0.5
sig.level = 0.05
power = 0.8014596
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(d=.8, n=N[3], sig.level = .05)
Two-sample t test power calculation
n = 26
d = 0.8
sig.level = 0.05
power = 0.8074866
alternative = two.sided
NOTE: n is number in *each* group
Solving effect sizes
Similar setup as above, however goal is now to solve d given sample size and power inputs (inputs for root no longer required to be an integer) # Step 1: Create design and solve for missing NAs (effect size d, in this case)
Design <- createDesign(N = c(100, 50, 25),
d = NA,
sig.level = .05)
Design # solve for NA's
# A tibble: 3 × 3
N d sig.level
<dbl> <dbl> <dbl>
1 100 NA 0.05
2 50 NA 0.05
3 25 NA 0.05
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions (same as above)
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Optimize d over the rows in design
# search between d = [.1, 2] for each row
# In this example, b = target power
# note that integer = FALSE to allow smooth updates of d, and convergence
# is based on whether the prediction CI is consistently within [.795, .805]
solved <- SimSolve(design=Design, b = .8, interval=c(.1, 2),
generate=Generate, analyse=Analyse,
summarise=Summarise, integer=FALSE, predCI.tol=.01, verbose=FALSE)
solved
# A tibble: 3 × 3
N d sig.level
<dbl> <dbl> <dbl>
1 100 0.39776 0.05
2 50 0.56304 0.05
3 25 0.80221 0.05
$condition_1
$root
[1] 0.3977577
$predCI.root
CI_2.5 CI_97.5
0.3961131 0.3993896
$b
[1] 0.8
$predCI.b
[1] 0.7951560 0.8048647
$terminated_early
[1] TRUE
$time
[1] 42.77s
$iterations
[1] 92
$total.replications
[1] 28900
$condition_2
$root
[1] 0.5630443
$predCI.root
CI_2.5 CI_97.5
0.5601256 0.5659318
$b
[1] 0.8
$predCI.b
[1] 0.7951143 0.8047925
$terminated_early
[1] TRUE
$time
[1] 41.25s
$iterations
[1] 91
$total.replications
[1] 28400
$condition_3
$root
[1] 0.8022067
$predCI.root
CI_2.5 CI_97.5
0.7950096 0.8121353
$b
[1] 0.8
$predCI.b
[1] 0.7951341 0.8047805
$terminated_early
[1] TRUE
$time
[1] 42.74s
$iterations
[1] 92
$total.replications
[1] 28900
Verify with true power from pwr package.
library(pwr)
pwr.t.test(n=100, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 100
d = 0.3981407
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(n=50, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 50
d = 0.565858
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(n=25, power = .8, sig.level = .05)
Two-sample t test power calculation
n = 25
d = 0.8087121
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
# use estimated d results to see how close power was
pwr.t.test(n=100, d = solved$d[1], sig.level = .05)
Two-sample t test power calculation
n = 100
d = 0.3977577
sig.level = 0.05
power = 0.7992496
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(n=50, d = solved$d[2], sig.level = .05)
Two-sample t test power calculation
n = 50
d = 0.5630443
sig.level = 0.05
power = 0.7960437
alternative = two.sided
NOTE: n is number in *each* group
pwr.t.test(n=25, d = solved$d[3], sig.level = .05)
Two-sample t test power calculation
n = 25
d = 0.8022067
sig.level = 0.05
power = 0.7936378
alternative = two.sided
NOTE: n is number in *each* group