Bioconductor Code: MethReg (original) (raw)
# MethReg \[!\[codecov\](https://codecov.io/gl/tiagochst/methtf/branch/%5Cx6d6173746572/graph/badge.svg?token=NESBYPVF64)\](https://codecov.io/gl/tiagochst/methtf) \[!\[license\](https://img.shields.io/badge/license-GPL%20\\(%3E%3D%202\\)-blue)\]() \`MethReg\` can be used to generate testable hypothesis on the synergistic interaction of DMRs and TFs in gene regulation. \`MethReg\` can be used either to evaluate regulatory potentials of candidate regions or to search for methylation coupled TF regulatory processes in the entire genome. ## Installation You can install the MethReg from Bioconductor with: \`\`\` r BiocManager::install("MethReg") \`\`\` ## Example This is a basic example which shows you how to use the package: \`\`\` r library(MethReg) #--------------------------------------- # Data input #--------------------------------------- # 1) Gene expression matrix # 2) DNA methylation # With same column names data("dna.met.chr21") data("gene.exp.chr21.log2") all(colnames(dna.met.chr21) == colnames(gene.exp.chr21.log2)) #> \[1\] TRUE # Since we are working with regions we need to map our 450k array to regions dna.met.chr21 <- make\_dnam\_se(dna.met.chr21) \`\`\` \`\`\` r #--------------------------------------- # Mapping regions #--------------------------------------- # For each region get target gene and predicted TF biding to the regions # get\_triplet incorporates two other functions: # 1) get\_region\_target\_gene # 2) get\_tf\_in\_region triplet <- create\_triplet\_distance\_based( region = rownames(dna.met.chr21), motif.search.window.size = 50, motif.search.p.cutoff = 10^-3, target.method = "genes.promoter.overlap", genome = "hg19", cores = 1 ) #> Finding target genes #> Mapping regions to the closest gene #> Looking for TFBS #> #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:base': #> #> expand.grid #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit #> Joining, by = "regionID" \`\`\` \`\`\` r #--------------------------------------- # Evaluate two models: #--------------------------------------- # 1) target gene \~ TF + DNAm + TF \* DNAm # 2) target gene \~ TF + DNAm\_group + TF \* DNAm\_group # where DNAm\_group is a binary indicator if the sample belongs to: Q4 or Q1 results <- interaction\_model( triplet = triplet, dnam = dna.met.chr21, exp = gene.exp.chr21.log2 ) \`\`\` \`\`\` r head(results) #> regionID target\_gene\_name target #> 1 chr21:30372219-30372220 RPL23P2 ENSG00000176054 #> 2 chr21:30430511-30430512 AF129075.5 ENSG00000231125 #> 3 chr21:33109780-33109781 AP000255.6 ENSG00000273091 #> 4 chr21:40692859-40692860 BRWD1 ENSG00000185658 #> 5 chr21:43982646-43982647 AP001625.6 ENSG00000235772 #> 6 chr21:43983587-43983588 AP001625.6 ENSG00000235772 #> TF\_external\_gene\_name TF TF\_symbol target\_symbol met.IQR #> 1 ETS2 ENSG00000157557 ETS2 RPL23P2 0.182568764 #> 2 BACH1 ENSG00000156273 BACH1 AF129075.2 0.310379208 #> 3 GABPA ENSG00000154727 GABPA AP000255.1 0.080160919 #> 4 GABPA ENSG00000154727 GABPA BRWD1 0.040638333 #> 5 GABPA ENSG00000154727 GABPA AP001625.2 0.008670064 #> 6 GABPA ENSG00000154727 GABPA AP001625.2 0.014175786 #> quant\_pval\_metGrp quant\_fdr\_metGrp quant\_pval\_rna.tf quant\_fdr\_rna.tf #> 1 3.953828e-05 0.0001186148 5.958024e-03 1.787407e-02 #> 2 7.437666e-01 0.7437665914 6.570509e-04 6.570509e-04 #> 3 2.208774e-03 0.0022087741 5.803942e-01 5.803942e-01 #> 4 3.823862e-01 0.3823861582 7.142112e-08 7.142112e-08 #> 5 4.434057e-01 0.4434057288 4.977765e-02 4.977765e-02 #> 6 5.619040e-01 0.9778889092 1.305771e-03 2.611541e-03 #> quant\_pval\_metGrp:rna.tf quant\_fdr\_metGrp:rna.tf quant\_estimate\_metGrp #> 1 3.768097e-05 0.0001130429 -83.041219 #> 2 7.945988e-01 0.7945988318 -3.533136 #> 3 2.305241e-03 0.0023052406 -30.858474 #> 4 4.999473e-01 0.4999473203 -3.386762 #> 5 4.663539e-01 0.4663538741 -10.643704 #> 6 5.234533e-01 0.9782246238 -5.525037 #> quant\_estimate\_rna.tf quant\_estimate\_metGrp:rna.tf Model.quantile #> 1 -2.0395999 3.8764266 Robust Linear Model #> 2 1.3437155 0.1642009 Robust Linear Model #> 3 0.2627746 1.8528803 Robust Linear Model #> 4 1.3876278 0.1522536 Robust Linear Model #> 5 1.1156152 0.5884195 Robust Linear Model #> 6 1.2125286 0.3594288 Robust Linear Model #> Wilcoxon\_pval\_target\_q4met\_vs\_q1met Wilcoxon\_pval\_tf\_q4met\_vs\_q1met #> 1 0.42735531 0.5707504 #> 2 0.47267559 0.4726756 #> 3 0.49466484 0.9097219 #> 4 0.03763531 0.7913368 #> 5 0.44952133 0.2730363 #> 6 1.00000000 0.2730363 #> % of 0 target genes (Q1 and Q4) #> 1 0 % #> 2 5 % #> 3 20 % #> 4 0 % #> 5 10 % #> 6 5 % \`\`\` # Session information \`\`\` r sessionInfo() #> R version 4.0.2 (2020-06-22) #> Platform: x86\_64-apple-darwin17.0 (64-bit) #> Running under: macOS Catalina 10.15.6 #> #> Matrix products: default #> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib #> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib #> #> locale: #> \[1\] en\_US.UTF-8/en\_US.UTF-8/en\_US.UTF-8/C/en\_US.UTF-8/en\_US.UTF-8 #> #> attached base packages: #> \[1\] stats4 parallel stats graphics grDevices utils datasets #> \[8\] methods base #> #> other attached packages: #> \[1\] BSgenome.Hsapiens.UCSC.hg19\_1.4.3 BSgenome\_1.56.0 #> \[3\] rtracklayer\_1.48.0 Biostrings\_2.56.0 #> \[5\] XVector\_0.28.0 GenomicRanges\_1.40.0 #> \[7\] GenomeInfoDb\_1.24.2 IRanges\_2.22.2 #> \[9\] S4Vectors\_0.26.1 sesameData\_1.6.0 #> \[11\] ExperimentHub\_1.15.3 AnnotationHub\_2.20.2 #> \[13\] BiocFileCache\_1.12.1 dbplyr\_1.4.4 #> \[15\] BiocGenerics\_0.34.0 MethReg\_0.99.11 #> #> loaded via a namespace (and not attached): #> \[1\] readxl\_1.3.1 backports\_1.1.10 #> \[3\] plyr\_1.8.6 BiocParallel\_1.22.0 #> \[5\] ggplot2\_3.3.2 TFBSTools\_1.26.0 #> \[7\] digest\_0.6.25 foreach\_1.5.0 #> \[9\] htmltools\_0.5.0 GO.db\_3.11.4 #> \[11\] magrittr\_1.5 memoise\_1.1.0 #> \[13\] doParallel\_1.0.15 sfsmisc\_1.1-7 #> \[15\] openxlsx\_4.1.5 readr\_1.3.1 #> \[17\] annotate\_1.66.0 matrixStats\_0.56.0 #> \[19\] R.utils\_2.10.1 JASPAR2020\_0.99.10 #> \[21\] prettyunits\_1.1.1 colorspace\_1.4-1 #> \[23\] blob\_1.2.1 rappdirs\_0.3.1 #> \[25\] haven\_2.3.1 xfun\_0.17 #> \[27\] dplyr\_1.0.2 crayon\_1.3.4 #> \[29\] RCurl\_1.98-1.2 TFMPvalue\_0.0.8 #> \[31\] iterators\_1.0.12 glue\_1.4.2 #> \[33\] gtable\_0.3.0 sesame\_1.6.0 #> \[35\] zlibbioc\_1.34.0 DelayedArray\_0.14.1 #> \[37\] car\_3.0-9 wheatmap\_0.1.0 #> \[39\] Rhdf5lib\_1.10.1 HDF5Array\_1.16.1 #> \[41\] abind\_1.4-5 scales\_1.1.1 #> \[43\] pscl\_1.5.5 DBI\_1.1.0 #> \[45\] rstatix\_0.6.0 Rcpp\_1.0.5 #> \[47\] xtable\_1.8-4 progress\_1.2.2 #> \[49\] foreign\_0.8-80 bit\_4.0.4 #> \[51\] preprocessCore\_1.50.0 httr\_1.4.2 #> \[53\] RColorBrewer\_1.1-2 ellipsis\_0.3.1 #> \[55\] pkgconfig\_2.0.3 XML\_3.99-0.5 #> \[57\] R.methodsS3\_1.8.1 DNAcopy\_1.62.0 #> \[59\] tidyselect\_1.1.0 rlang\_0.4.7 #> \[61\] reshape2\_1.4.4 later\_1.1.0.1 #> \[63\] AnnotationDbi\_1.50.3 munsell\_0.5.0 #> \[65\] BiocVersion\_3.11.1 cellranger\_1.1.0 #> \[67\] tools\_4.0.2 DirichletMultinomial\_1.30.0 #> \[69\] generics\_0.0.2 RSQLite\_2.2.0 #> \[71\] broom\_0.7.0 evaluate\_0.14 #> \[73\] stringr\_1.4.0 fastmap\_1.0.1 #> \[75\] yaml\_2.2.1 knitr\_1.29 #> \[77\] bit64\_4.0.5 zip\_2.1.1 #> \[79\] caTools\_1.18.0 purrr\_0.3.4 #> \[81\] randomForest\_4.6-14 KEGGREST\_1.28.0 #> \[83\] mime\_0.9 R.oo\_1.24.0 #> \[85\] poweRlaw\_0.70.6 pracma\_2.2.9 #> \[87\] compiler\_4.0.2 curl\_4.3 #> \[89\] png\_0.1-7 interactiveDisplayBase\_1.26.3 #> \[91\] ggsignif\_0.6.0 tibble\_3.0.3 #> \[93\] stringi\_1.5.3 forcats\_0.5.0 #> \[95\] lattice\_0.20-41 CNEr\_1.24.0 #> \[97\] Matrix\_1.2-18 vctrs\_0.3.4 #> \[99\] pillar\_1.4.6 lifecycle\_0.2.0 #> \[101\] BiocManager\_1.30.10 data.table\_1.13.0 #> \[103\] bitops\_1.0-6 httpuv\_1.5.4 #> \[105\] R6\_2.4.1 promises\_1.1.1 #> \[107\] rio\_0.5.16 codetools\_0.2-16 #> \[109\] MASS\_7.3-53 gtools\_3.8.2 #> \[111\] assertthat\_0.2.1 seqLogo\_1.54.3 #> \[113\] rhdf5\_2.32.2 SummarizedExperiment\_1.18.2 #> \[115\] GenomicAlignments\_1.24.0 Rsamtools\_2.4.0 #> \[117\] GenomeInfoDbData\_1.2.3 hms\_0.5.3 #> \[119\] motifmatchr\_1.10.0 grid\_4.0.2 #> \[121\] tidyr\_1.1.2 rmarkdown\_2.3 #> \[123\] carData\_3.0-4 ggpubr\_0.4.0 #> \[125\] Biobase\_2.48.0 shiny\_1.5.0 \`\`\`