Abstract

bcbioSmallRna package version: 0.0.1

library(BiocStyle)
knitr::opts_chunk$set(tidy=FALSE,
                      dev="png",
                      message=FALSE, error=FALSE,
                      warning=TRUE)
library(knitr)
library(ggplot2)

# Set seed for reproducibility
set.seed(1454944673L)

theme_set(
    theme_light(base_size = 11L))
theme_update(
    legend.justification = "center",
    legend.position = "bottom")
library(isomiRs)
library(DEGreport)
library(bcbioSmallRna)

data(sbcb)
# bcbioSmallRnaDataSet
bcb <- sbcb

Get count matrix

You can get all the count matrix with the method mirna, isomir, cluster:

# for miRNAs
head(mirna(bcb))
##               ERR187490 ERR187494 ERR187664 ERR187665
## hsa-let-7a-3p        26       102        25       197
## hsa-let-7a-5p     15396     88290     30838    111189
## hsa-let-7b-3p         8         0         0        39
## hsa-let-7b-5p       400       229       106      1067
## hsa-let-7c-5p        58        58        93       115
## hsa-let-7d-3p       124       560       265       848
# for clusters
head(cluster(bcb))
##           ERR187490 ERR187494 ERR187664 ERR187665
## cluster:1        32       190       126        55
## cluster:2      1033      6675      2283      8369
## cluster:3       313      1077       564      2132
## cluster:4      2996     16959     15000     48050
## cluster:5       465      2470       729      2259
## cluster:6         9        43        14        47
# for isomir
head(isomir(bcb))
##                         ERR187490 ERR187494 ERR187664 ERR187665
## hsa-let-7a-3p                   5        26         5        36
## hsa-let-7a-3p;iso_3p:c          3         8         3        27
## hsa-let-7a-3p;iso_3p:C          0         2         0         3
## hsa-let-7a-3p;iso_3p:tc         3         0         0         0
## hsa-let-7a-3p;iso_add:A         0         4         0         6
## hsa-let-7a-3p;iso_add:T        15        54        17       116

By default this is the raw count data, however you can access a pre-computed normalized data using the second positional parameter log:

head(mirna(bcb, "log"))
##               ERR187490 ERR187494 ERR187664 ERR187665
## hsa-let-7a-3p  6.215516  6.219226  5.698424  6.803659
## hsa-let-7a-5p 15.086041 15.638169 15.467636 15.723060
## hsa-let-7b-3p  5.021667  3.046144  3.046144  5.063780
## hsa-let-7b-5p  9.844976  7.212310  7.424764  9.064181
## hsa-let-7c-5p  7.199906  5.603116  7.254332  6.159710
## hsa-let-7d-3p  8.210866  8.408052  8.663905  8.744015

Metrics

There are some important metris stored in the object that can be gotten with the following methods:

Adapter removal

These section shows how to get general stats for the adapter removal step.

To get the numbers of adapters removed at each position:

head(adapter(bcb)[["reads_by_pos"]])
##   size  reads    sample colorby
## 1   17 155324 ERR187490 BRITISH
## 2   18 293195 ERR187490 BRITISH
## 3   19 155948 ERR187490 BRITISH
## 4   20 187603 ERR187490 BRITISH
## 5   21 211411 ERR187490 BRITISH
## 6   22 338768 ERR187490 BRITISH

As well, the total reads with adapter can be seen with:

adapter(bcb)[["reads_by_sample"]]
## # A tibble: 4 x 3
## # Groups:   sample [?]
##   sample    colorby   total
##   <chr>     <fct>     <int>
## 1 ERR187490 BRITISH 2457059
## 2 ERR187494 FINLAND 6048597
## 3 ERR187664 USA     3759076
## 4 ERR187665 NIGERIA 5772822

General metrics

All the metrics performed by bcbio can be seen with:

metrics(bcb)
##   country  group    sample library_size quality_format read_pass_filter
## 1 BRITISH group1 ERR187490           30       standard          8594767
## 2 FINLAND group1 ERR187494           30       standard         11802968
## 3     USA group2 ERR187664           30       standard          9697283
## 4 NIGERIA group2 ERR187665           40       standard          8176320
##   read_with_adapter reads_before_trimming sequence_length
## 1           3098670               8594767           17-28
## 2           8488581              11802968           17-28
## 3           4334146               9697283           17-28
## 4           8075701               8176320           17-42
##   sequences_flagged_as_poor_quality x_gc
## 1                                 0   51
## 2                                 0   51
## 3                                 0   49
## 4                                 0   49

Session

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] bcbioSmallRna_0.0.1         DEGreport_1.17.5           
##  [3] isomiRs_1.9.1               SummarizedExperiment_1.10.1
##  [5] DelayedArray_0.6.5          BiocParallel_1.14.2        
##  [7] matrixStats_0.54.0          Biobase_2.40.0             
##  [9] GenomicRanges_1.32.6        GenomeInfoDb_1.16.0        
## [11] IRanges_2.14.11             S4Vectors_0.18.3           
## [13] BiocGenerics_0.26.0         DiscriMiner_0.1-29         
## [15] ggplot2_3.0.0               knitr_1.20                 
## [17] BiocStyle_2.8.2            
## 
## loaded via a namespace (and not attached):
##   [1] assertive.base_0.0-7        colorspace_1.3-2           
##   [3] rjson_0.2.20                rprojroot_1.3-2            
##   [5] circlize_0.4.4              htmlTable_1.12             
##   [7] XVector_0.20.0              ggdendro_0.1-20            
##   [9] GlobalOptions_0.1.0         base64enc_0.1-3            
##  [11] fs_1.2.6                    rstudioapi_0.7             
##  [13] roxygen2_6.1.0              assertive.sets_0.0-3       
##  [15] MultiAssayExperiment_1.6.0  ggrepel_0.8.0              
##  [17] bit64_0.9-7                 fansi_0.3.0                
##  [19] AnnotationDbi_1.42.1        xml2_1.2.0                 
##  [21] splines_3.5.1               logging_0.7-103            
##  [23] mnormt_1.5-5                geneplotter_1.58.0         
##  [25] Formula_1.2-3               Nozzle.R1_1.1-1            
##  [27] broom_0.5.0                 annotate_1.58.0            
##  [29] cluster_2.0.7-1             readr_1.1.1                
##  [31] compiler_3.5.1              backports_1.1.2            
##  [33] assertthat_0.2.0            Matrix_1.2-14              
##  [35] lazyeval_0.2.1              cli_1.0.0                  
##  [37] limma_3.36.3                lasso2_1.2-19              
##  [39] acepack_1.4.1               htmltools_0.3.6            
##  [41] tools_3.5.1                 bindrcpp_0.2.2             
##  [43] gtable_0.2.0                glue_1.3.0                 
##  [45] GenomeInfoDbData_1.1.0      dplyr_0.7.6                
##  [47] Rcpp_0.12.18                pkgdown_1.1.0              
##  [49] gdata_2.18.0                nlme_3.1-137               
##  [51] psych_1.8.4                 stringr_1.3.1              
##  [53] gtools_3.8.1                XML_3.98-1.16              
##  [55] edgeR_3.22.3                zlibbioc_1.26.0            
##  [57] MASS_7.3-50                 scales_1.0.0               
##  [59] hms_0.4.2                   RColorBrewer_1.1-2         
##  [61] ComplexHeatmap_1.18.1       yaml_2.2.0                 
##  [63] memoise_1.1.0               gridExtra_2.3              
##  [65] rpart_4.1-13                reshape_0.8.7              
##  [67] latticeExtra_0.6-28         stringi_1.2.4              
##  [69] RSQLite_2.1.1               genefilter_1.62.0          
##  [71] desc_1.2.0                  checkmate_1.8.5            
##  [73] caTools_1.17.1.1            shape_1.4.4                
##  [75] rlang_0.2.2                 pkgconfig_2.0.2            
##  [77] commonmark_1.5              bitops_1.0-6               
##  [79] evaluate_0.11               lattice_0.20-35            
##  [81] purrr_0.2.5                 bindr_0.1.1                
##  [83] htmlwidgets_1.2             cowplot_0.9.3              
##  [85] bit_1.1-14                  tidyselect_0.2.4           
##  [87] GGally_1.4.0                plyr_1.8.4                 
##  [89] magrittr_1.5                DESeq2_1.20.0              
##  [91] R6_2.2.2                    gplots_3.0.1               
##  [93] Hmisc_4.1-1                 DBI_1.0.0                  
##  [95] pillar_1.3.0                foreign_0.8-71             
##  [97] withr_2.1.2                 survival_2.42-6            
##  [99] RCurl_1.95-4.11             nnet_7.3-12                
## [101] tibble_1.4.2                janitor_1.1.1              
## [103] crayon_1.3.4                utf8_1.1.4                 
## [105] KernSmooth_2.23-15          rmarkdown_1.10             
## [107] GetoptLong_0.1.7            locfit_1.5-9.1             
## [109] grid_3.5.1                  data.table_1.11.4          
## [111] blob_1.1.1                  ConsensusClusterPlus_1.44.0
## [113] digest_0.6.16               xtable_1.8-3               
## [115] tidyr_0.8.1                 munsell_0.5.0