The Endometriosis-associated Microbiome (original) (raw)

Comparing mean to variance of the outcome variable

## [1] 4049.558
## [1] 509068831
fraczeroes <- nrow(df[df$Streptococcus == 0,])/nrow(df)
fraczeroes
## [1] 0.8736842

#Fit log-linear models

fit.pois <- glm(Streptococcus ~ Lactobacillus + age + BMI,
                data = df,
                family = poisson(link = "log"))

summary (fit.pois)
## 
## Call:
## glm(formula = Streptococcus ~ Lactobacillus + age + BMI, family = poisson(link = "log"), 
##     data = df)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -5.313e-01  1.873e-02  -28.36   <2e-16 ***
## Lactobacillus  5.141e-08  9.828e-10   52.32   <2e-16 ***
## age            3.599e-01  4.136e-04  870.25   <2e-16 ***
## BMI           -1.573e-01  6.453e-04 -243.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1929049  on 80  degrees of freedom
## Residual deviance:  632598  on 77  degrees of freedom
##   (14 observations deleted due to missingness)
## AIC: 632704
## 
## Number of Fisher Scoring iterations: 20
library(pscl)
fit.ZIpois <- zeroinfl(Streptococcus ~ Lactobacillus + age + BMI | 1,
                data = df,
                dist = "poisson")

summary (fit.ZIpois)
## 
## Call:
## zeroinfl(formula = Streptococcus ~ Lactobacillus + age + BMI | 1, data = df, 
##     dist = "poisson")
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3753 -0.3753 -0.3752 -0.3750 27.8168 
## 
## Count model coefficients (poisson with log link):
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)    6.169e+00        NaN     NaN      NaN
## Lactobacillus  1.321e-07        NaN     NaN      NaN
## age            2.420e-01        NaN     NaN      NaN
## BMI           -2.116e-01        NaN     NaN      NaN
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)     1.96        NaN     NaN      NaN
## 
## Number of iterations in BFGS optimization: 20 
## Log-likelihood: -1.02e+05 on 5 Df
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
fit.negbin <-
  glm.nb(
    Streptococcus ~ Lactobacillus + age + BMI,
    data = df,
    control = glm.control(
      epsilon = 1e-8,
      maxit = 50,
      trace = FALSE
    )
  )
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning: glm.fit: algorithm did not converge
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(Streptococcus ~ Lactobacillus + age + BMI, data = df, :
## alternation limit reached
## # A tibble: 4 × 5
##   term               estimate   std.error statistic     p.value
##   <chr>                 <dbl>       <dbl>     <dbl>       <dbl>
## 1 (Intercept)   -34.4         8.11            -4.24 0.0000224  
## 2 Lactobacillus   0.000000889 0.000000196      4.54 0.00000570 
## 3 age             1.09        0.207            5.30 0.000000119
## 4 BMI             0.297       0.277            1.07 0.282
fit.ZInegbin <- zeroinfl(Streptococcus ~ Lactobacillus + age + BMI | 1,
                data = df,
                dist = "negbin")

summary(fit.ZInegbin)
## 
## Call:
## zeroinfl(formula = Streptococcus ~ Lactobacillus + age + BMI | 1, data = df, 
##     dist = "negbin")
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.1898 -0.1898 -0.1898 -0.1898  7.0569 
## 
## Count model coefficients (negbin with log link):
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)    1.945e+01        NaN     NaN      NaN
## Lactobacillus -2.787e-07        NaN     NaN      NaN
## age            2.839e-02        NaN     NaN      NaN
## BMI           -4.084e-01        NaN     NaN      NaN
## Log(theta)    -9.837e-01        NaN     NaN      NaN
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    1.921        NaN     NaN      NaN
## 
## Theta = 0.3739 
## Number of iterations in BFGS optimization: 40 
## Log-likelihood: -131.8 on 6 Df