Stochastic root solving of independent samples t-tests example (original) (raw)

Step 1: create design and solve for missing values (NA)

library(SimDesign)

Design <- createDesign(N = NA,
                       d = c(.2, .5, .8),
                       sig.level = .05)
Design    # solve for NA's
# A tibble: 3 × 3
      N     d sig.level
  <dbl> <dbl>     <dbl>
1    NA   0.2      0.05
2    NA   0.5      0.05
3    NA   0.8      0.05

Step 2 — Define generate, analyse, and summarise functions

Generate <- function(condition, fixed_objects = NULL) {
    Attach(condition)
    group1 <- rnorm(N)
    group2 <- rnorm(N, mean=d)
    dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
                      DV = c(group1, group2))
    dat
}

Analyse <- function(condition, dat, fixed_objects = NULL) {
    p <- t.test(DV ~ group, dat, var.equal=TRUE)$p.value
    p
}

Summarise <- function(condition, results, fixed_objects = NULL) {
    # Must return a single number corresponding to f(x) in the
    # root equation f(x) = b

    ret <- EDR(results, alpha = condition$sig.level)
    ret
}

Step 3 — Optimize N over the rows in design

# Initial search between N = [10,500] for each row using the default
# integer solver (integer = TRUE)

# In this example, b = target power.
# Terminate if prediction CI is consistently within [.795, .805]
solved <- SimSolve(design=Design, b=.8, interval=c(10, 500),
                   generate=Generate, analyse=Analyse,
                   summarise=Summarise, predCI.tol = .01, verbose=FALSE)
solved
# A tibble: 3 × 3
        N     d sig.level
    <dbl> <dbl>     <dbl>
1 393.39    0.2      0.05
2  63.167   0.5      0.05
3  25.471   0.8      0.05
$condition_1
$root
[1] 393.3934

$predCI.root
  CI_2.5  CI_97.5 
391.4833 395.3150 

$b
[1] 0.8

$predCI.b
[1] 0.7952861 0.8046320

$terminated_early
[1] TRUE

$time
[1] 52.08s

$iterations
[1] 99

$total.replications
[1] 32400

$tab
           y   x  reps
11 0.7976190 390   420
14 0.7953886 393 11710
15 0.8029930 394  5680
16 0.8060408 395 12250
18 0.8457143 397   350


$condition_2
$root
[1] 63.16727

$predCI.root
  CI_2.5  CI_97.5 
62.41471 63.93166 

$b
[1] 0.8

$predCI.b
[1] 0.7951040 0.8048077

$terminated_early
[1] TRUE

$time
[1] 47.00s

$iterations
[1] 98

$total.replications
[1] 31900

$tab
           y  x  reps
6  0.7714286 60  1190
7  0.7868687 61   990
8  0.7977821 62 10370
9  0.7924797 63  4920
10 0.8008272 64 10880
11 0.8208333 65  1200
12 0.8600000 66   400


$condition_3
$root
[1] 25.47103

$predCI.root
  CI_2.5  CI_97.5 
25.08662 25.84527 

$b
[1] 0.8

$predCI.b
[1] 0.7933248 0.8065120

$terminated_early
[1] FALSE

$time
[1] 47.49s

$iterations
[1] 100

$total.replications
[1] 32900

$tab
          y  x  reps
1 0.7921101 25 10900
2 0.8077287 26  6340
3 0.8272152 27 14220
4 0.8418919 28   740
# also can plot median history and estimate precision
plot(solved, 1, type = 'history')
plot(solved, 1, type = 'density')
Warning in density.default(x, weights = reps/sum(reps)): Selecting bandwidth
*not* using 'weights'
# verify with true power from pwr package
library(pwr)
pwr.t.test(d=.2, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 393.4057
              d = 0.2
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.5, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 63.76561
              d = 0.5
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.8, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 25.52458
              d = 0.8
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
# use estimated N results to see how close power was
N <- solved$N
pwr.t.test(d=.2, n=N[1], sig.level = .05)

     Two-sample t test power calculation 

              n = 393.3934
              d = 0.2
      sig.level = 0.05
          power = 0.7999878
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.5, n=N[2], sig.level = .05)

     Two-sample t test power calculation 

              n = 63.16727
              d = 0.5
      sig.level = 0.05
          power = 0.7962324
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.8, n=N[3], sig.level = .05)

     Two-sample t test power calculation 

              n = 25.47103
              d = 0.8
      sig.level = 0.05
          power = 0.7991415
    alternative = two.sided

NOTE: n is number in *each* group
# with rounding
N <- ceiling(solved$N)
pwr.t.test(d=.2, n=N[1], sig.level = .05)

     Two-sample t test power calculation 

              n = 394
              d = 0.2
      sig.level = 0.05
          power = 0.8005931
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.5, n=N[2], sig.level = .05)

     Two-sample t test power calculation 

              n = 64
              d = 0.5
      sig.level = 0.05
          power = 0.8014596
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(d=.8, n=N[3], sig.level = .05)

     Two-sample t test power calculation 

              n = 26
              d = 0.8
      sig.level = 0.05
          power = 0.8074866
    alternative = two.sided

NOTE: n is number in *each* group

Solving effect sizes

Similar setup as above, however goal is now to solve d given sample size and power inputs (inputs for root no longer required to be an integer) # Step 1: Create design and solve for missing NAs (effect size d, in this case)

Design <- createDesign(N = c(100, 50, 25),
                       d = NA,
                       sig.level = .05)
Design    # solve for NA's
# A tibble: 3 × 3
      N     d sig.level
  <dbl> <dbl>     <dbl>
1   100    NA      0.05
2    50    NA      0.05
3    25    NA      0.05
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions (same as above)

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Optimize d over the rows in design
# search between d = [.1, 2] for each row

# In this example, b = target power
# note that integer = FALSE to allow smooth updates of d, and convergence
# is based on whether the prediction CI is consistently within [.795, .805]
solved <- SimSolve(design=Design, b = .8, interval=c(.1, 2),
                   generate=Generate, analyse=Analyse,
                   summarise=Summarise, integer=FALSE, predCI.tol=.01, verbose=FALSE)
solved
# A tibble: 3 × 3
      N       d sig.level
  <dbl>   <dbl>     <dbl>
1   100 0.39776      0.05
2    50 0.56304      0.05
3    25 0.80221      0.05
$condition_1
$root
[1] 0.3977577

$predCI.root
   CI_2.5   CI_97.5 
0.3961131 0.3993896 

$b
[1] 0.8

$predCI.b
[1] 0.7951560 0.8048647

$terminated_early
[1] TRUE

$time
[1] 42.77s

$iterations
[1] 92

$total.replications
[1] 28900


$condition_2
$root
[1] 0.5630443

$predCI.root
   CI_2.5   CI_97.5 
0.5601256 0.5659318 

$b
[1] 0.8

$predCI.b
[1] 0.7951143 0.8047925

$terminated_early
[1] TRUE

$time
[1] 41.25s

$iterations
[1] 91

$total.replications
[1] 28400


$condition_3
$root
[1] 0.8022067

$predCI.root
   CI_2.5   CI_97.5 
0.7950096 0.8121353 

$b
[1] 0.8

$predCI.b
[1] 0.7951341 0.8047805

$terminated_early
[1] TRUE

$time
[1] 42.74s

$iterations
[1] 92

$total.replications
[1] 28900

Verify with true power from pwr package.

library(pwr)
pwr.t.test(n=100, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 100
              d = 0.3981407
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(n=50, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 50
              d = 0.565858
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(n=25, power = .8, sig.level = .05)

     Two-sample t test power calculation 

              n = 25
              d = 0.8087121
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group
# use estimated d results to see how close power was
pwr.t.test(n=100, d = solved$d[1], sig.level = .05)

     Two-sample t test power calculation 

              n = 100
              d = 0.3977577
      sig.level = 0.05
          power = 0.7992496
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(n=50, d = solved$d[2], sig.level = .05)

     Two-sample t test power calculation 

              n = 50
              d = 0.5630443
      sig.level = 0.05
          power = 0.7960437
    alternative = two.sided

NOTE: n is number in *each* group
pwr.t.test(n=25, d = solved$d[3], sig.level = .05)

     Two-sample t test power calculation 

              n = 25
              d = 0.8022067
      sig.level = 0.05
          power = 0.7936378
    alternative = two.sided

NOTE: n is number in *each* group