carnation - airway tutorial • carnation

Load libraries & airway dataset

Load carnation.

library(carnation)
#>

First load some libraries that we will need for this tutorial.

library(DESeq2)
library(dplyr)
library(GeneTonic)

# install optional packages if not present
pkgs_to_check <- c('airway', 'org.Hs.eg.db', 'clusterProfiler', 'DEGreport')
for(pkg in pkgs_to_check){
  setRepositories(ind=c(1,2,3,4,5))
  if(!requireNamespace(pkg, quietly=TRUE)){
    install.packages(pkg, repos='http://cran.us.r-project.org')
  }
  library(pkg, character.only=TRUE)
}

We will be using the ‘airway’ dataset. First, we load this dataset.

data('airway')

Next, we extract the counts matrix and and metadata.

mat <- assay(airway)
cdata <- colData(airway)

Now let’s see what these look like.

dim(mat)
#> [1] 63677     8

So, mat is a matrix with 64102 rows and 8 columns. Each row corresponds to a single gene and each column corresponds to a single sample. As you will notice, the rownames of mat contains gene IDs and column names have the sample IDs.

head(mat)
#>                 SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
#> ENSG00000000003        679        448        873        408       1138
#> ENSG00000000005          0          0          0          0          0
#> ENSG00000000419        467        515        621        365        587
#> ENSG00000000457        260        211        263        164        245
#> ENSG00000000460         60         55         40         35         78
#> ENSG00000000938          0          0          2          0          1
#>                 SRR1039517 SRR1039520 SRR1039521
#> ENSG00000000003       1047        770        572
#> ENSG00000000005          0          0          0
#> ENSG00000000419        799        417        508
#> ENSG00000000457        331        233        229
#> ENSG00000000460         63         76         60
#> ENSG00000000938          0          0          0

cdata contains the sample metadata. There is a lot of information here, but notice the cell and dex columns, as we will be using this for the differential expression analysis later.

cdata
#> DataFrame with 8 rows and 9 columns
#>            SampleName     cell      dex    albut        Run avgLength
#>              <factor> <factor> <factor> <factor>   <factor> <integer>
#> SRR1039508 GSM1275862  N61311     untrt    untrt SRR1039508       126
#> SRR1039509 GSM1275863  N61311     trt      untrt SRR1039509       126
#> SRR1039512 GSM1275866  N052611    untrt    untrt SRR1039512       126
#> SRR1039513 GSM1275867  N052611    trt      untrt SRR1039513        87
#> SRR1039516 GSM1275870  N080611    untrt    untrt SRR1039516       120
#> SRR1039517 GSM1275871  N080611    trt      untrt SRR1039517       126
#> SRR1039520 GSM1275874  N061011    untrt    untrt SRR1039520       101
#> SRR1039521 GSM1275875  N061011    trt      untrt SRR1039521        98
#>            Experiment    Sample    BioSample
#>              <factor>  <factor>     <factor>
#> SRR1039508  SRX384345 SRS508568 SAMN02422669
#> SRR1039509  SRX384346 SRS508567 SAMN02422675
#> SRR1039512  SRX384349 SRS508571 SAMN02422678
#> SRR1039513  SRX384350 SRS508572 SAMN02422670
#> SRR1039516  SRX384353 SRS508575 SAMN02422682
#> SRR1039517  SRX384354 SRS508576 SAMN02422673
#> SRR1039520  SRX384357 SRS508579 SAMN02422683
#> SRR1039521  SRX384358 SRS508580 SAMN02422677

Get more gene annotation

The gene IDs that come with the dataset are from ENSEMBL and are not human-readable. So, next we will extract gene symbols and ENTREZID for these genes from the org.Hs.eg.db package.

keytypes <- list('SYMBOL'='SYMBOL', 'ENTREZID'='ENTREZID')

anno_df <- do.call('cbind',
             lapply(keytypes, function(x){
               mapIds(org.Hs.eg.db,
                 column=x,
                 keys=rownames(mat),
                 keytype='ENSEMBL')
               })
             )

# convert to data frame
anno_df <- as.data.frame(anno_df)

Now, we have human readable gene names in the SYMBOL column and Entrez IDs in the ENTREZ column.

head(anno_df)
#>                 SYMBOL ENTREZID
#> ENSG00000000003 TSPAN6     7105
#> ENSG00000000005   TNMD    64102
#> ENSG00000000419   DPM1     8813
#> ENSG00000000457  SCYL3    57147
#> ENSG00000000460  FIRRM    55732
#> ENSG00000000938    FGR     2268

Create DESeqDataSet

Next, we create a new DESeqDataSet using mat and cdata.

dds <- DESeqDataSetFromMatrix(mat,
                              colData=cdata,
                              design=~cell + dex)

Let’s check to make sure that everything looks okay:

dds
#> class: DESeqDataSet 
#> dim: 63677 8 
#> metadata(1): version
#> assays(1): counts
#> rownames(63677): ENSG00000000003 ENSG00000000005 ... ENSG00000273492
#>   ENSG00000273493
#> rowData names(0):
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample

Then we save dds in a list.

dds_list <- list(main=dds)

We also normalize the counts data and save it in a list.

rld_list <- lapply(dds_list, function(x) varianceStabilizingTransformation(x, blind=TRUE))

Run differential expression analysis

Now, the object dds is ready for differential expression (DE) analysis.

dds <- DESeq(dds)
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing

Since, we used design=~cell + dex while creating dds, the above step will automatically calculate some comparisons.

resultsNames(dds)
#> [1] "Intercept"               "cell_N061011_vs_N052611"
#> [3] "cell_N080611_vs_N052611" "cell_N61311_vs_N052611" 
#> [5] "dex_untrt_vs_trt"

The last comparison dex_untrt_vs_trt contains the effect of the dex treatment, while the other comparisons compare different cell lines. Next, we will extract two of these results and run lfcShrink on them.

For the cell comparison, we choose a precomputed results using the coef parameter.

cell_comparison <- lfcShrink(dds,
                             coef='cell_N61311_vs_N052611',
                             type='normal')

For the dex comparison, we use contrast to specify the direction of the comparison, since we want to use untrt as control.

dex_comparison <- lfcShrink(dds,
                            contrast=c('dex', 'trt', 'untrt'),
                            type='normal')

Now, each of these comparisons, contain the DE analysis results. For example,

head(dex_comparison)
#> log2 fold change (MAP): dex trt vs untrt 
#> Wald test p-value: dex trt vs untrt 
#> DataFrame with 6 rows and 6 columns
#>                   baseMean log2FoldChange     lfcSE      stat      pvalue
#>                  <numeric>      <numeric> <numeric> <numeric>   <numeric>
#> ENSG00000000003 708.602170     -0.3741527 0.0988429 -3.787752 0.000152016
#> ENSG00000000005   0.000000             NA        NA        NA          NA
#> ENSG00000000419 520.297901      0.2020620 0.1097395  1.842943 0.065337292
#> ENSG00000000457 237.163037      0.0361672 0.1383377  0.264356 0.791505742
#> ENSG00000000460  57.932633     -0.0844567 0.2498904 -0.307054 0.758801924
#> ENSG00000000938   0.318098     -0.0841390 0.1513343 -0.393793 0.693733530
#>                       padj
#>                  <numeric>
#> ENSG00000000003 0.00128292
#> ENSG00000000005         NA
#> ENSG00000000419 0.19646985
#> ENSG00000000457 0.91141962
#> ENSG00000000460 0.89500478
#> ENSG00000000938         NA

Then, we save these results in a special nested list that carnation will use. Here,

res contains the actual DE analysis results
dds contains the name of the DESeqDataSet used for the DE analysis. These values should map to dds_list names
label is a description of the comparison

res_list <- list(
        dex_trt_vs_untrt=list(
            res=dex_comparison,
            dds='main',
            label='dex, treated vs untreated'),
        cell_N61311_vs_N052611=list(
            res=cell_comparison,
            dds='main',
            label='cell, N61311 vs N052611')
        )

Finally, we add SYMBOL and ENTREZID columns to the DE results from the anno_df data frame.

res_list <- lapply(res_list, function(x){
              # save the rownames as a new 'gene' column
              x$res[[ 'gene' ]] <- rownames(x$res)

              # add 'SYMBOL' and 'ENTREZID' columns
              x$res[[ 'SYMBOL' ]] <- anno_df[rownames(x$res), 'SYMBOL']
              x$res[[ 'ENTREZID' ]] <- anno_df[rownames(x$res), 'ENTREZID']

              x
            })

Add functional enrichment results (optional)

Now we run functional enrichment on the DE genes from the two comparisons. For this, we first set significance thresholds and then extract the DE genes and save as a list.

# padj cutoff
alpha <- 0.01

# log2FoldChange threshold; 1 == 2x difference
lfc_threshold <- 1

# list to save DE genes
de.genes <- lapply(res_list, function(x){
              # changed genes
              idx <- x$res$padj < alpha &
                     !is.na(x$res$padj) &
                     abs(x$res$log2FoldChange) >= lfc_threshold

              # return DE genes as a dataframe
              x$res[idx, c('gene', 'ENTREZID')]
            })

Next, we run functional enrichment and save the results in a list called enrich_list. We also save a converted list called genetonic which carnation uses for several plots from the GeneTonic package.

# list to save functional enrichment results
enrich_list <- list()

# list to save a converted object for GeneTonic plots
genetonic <- list()

for(comp in names(res_list)){
    go.res <- clusterProfiler::enrichGO(
            gene=de.genes[[comp]][['ENTREZID']],
            keyType='ENTREZID',
            OrgDb=org.Hs.eg.db,
            ont='BP',
            pvalueCutoff=1, qvalueCutoff=1,
            readable = TRUE)

    enrich_list[[ comp ]] <- list(
                               res=comp,
                               changed=list( BP=as.data.frame(go.res) )
                             )

    genetonic[[ comp ]] <- list(
                             res=comp,
                             changed=list(
                               BP=enrich_to_genetonic(go.res, res_list[[comp]]$res)
                             )
                           )

}

enrich_list is a nested list where:

The top-level names are unique keys/identifiers
The second level corresponds to direction of change, e.g. up, down or changed. This level also contains a special entry res which maps to res_list names, as a way to record where the DE results came from.
The third level corresponds to the ontology, e.g. BP (GO Biological Process).

Here, we are just using changed genes and the GO Biological Process (BP) ontology, so enrich_list looks like:

enrich_list
  ├─ dex_trt_vs_untrt
  │    ├─ res = 'des_trt_vs_untrt'    <--- comparison used to get FE results
  │    └─ changed
  │         └─ BP                     <--- functional enrichment results
  │
  └─ cell_N61311_vs_N052611
       ├─ res = 'cell_N61311_vs_N052611'    <--- comparison used to get FE results
       └─ changed
            └─ BP                           <--- functional enrichment results

genetonic mirrors the same structure.

Add pattern analysis (optional)

Finally, we add some pattern analysis for the dex_trt_vs_untrt comparison using the DEGreport package. First, we extract normalized data for the 755 DE genes from this comparison.

# extract normalized data & metadata
ma <- assay(rld_list[['main']])
colData.i <- colData(rld_list[['main']])

# only keep data from DE genes
idx <- rownames(ma) %in% de.genes[['dex_trt_vs_untrt']][['gene']]
ma.i <- ma[idx,]

# remove any genes with 0 variance
ma.i <- ma.i[rowVars(ma.i) != 0, ]

Then, we run the pattern analysis, using cell as the time variable and dex as the color variable.

p <- DEGreport::degPatterns(
        ma.i,
        colData.i,

        time='cell',
        col='dex',

        # NOTE: reduce and merge cutoff----------------------------------------
        #   Reduce will merge clusters that are similar; similarity determined
        #   by cutoff
        reduce=TRUE,

        # NOTE: set minimum cluster size to 1
        minc=1,
        plot=FALSE
        )
#> Working with 754 genes.
#> Working with 650 genes after filtering: minc > 1
#> Joining with `by = join_by(merge)`
#> Joining with `by = join_by(merge)`

Next, we extract the normalized slot from this object and save as a list.

# extract normalized slot and add symbol column
p_norm <- p$normalized
p_norm[[ 'SYMBOL' ]] <- anno_df[p_norm[['genes']], 'SYMBOL']

# save pattern analysis results
degpatterns <- list(dex_by_cell=p_norm)

Compose carnation object

Now we have all the pieces to build the carnation object.

combined <- list(res_list=res_list,
                 dds_list=dds_list,
                 rld_list=rld_list,
                 enrich_list=enrich_list,
                 genetonic=genetonic,
                 degpatterns_list=degpatterns)
saveRDS(combined, 'carnation_vignette.rds', compress=FALSE)

sessionInfo

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] DEGreport_1.44.0            clusterProfiler_4.16.0     
#>  [3] org.Hs.eg.db_3.21.0         AnnotationDbi_1.70.0       
#>  [5] airway_1.28.0               GeneTonic_3.2.0            
#>  [7] dplyr_1.1.4                 DESeq2_1.48.1              
#>  [9] SummarizedExperiment_1.38.1 Biobase_2.68.0             
#> [11] MatrixGenerics_1.20.0       matrixStats_1.5.0          
#> [13] GenomicRanges_1.60.0        GenomeInfoDb_1.44.0        
#> [15] IRanges_2.42.0              S4Vectors_0.46.0           
#> [17] BiocGenerics_0.54.0         generics_0.1.4             
#> [19] carnation_1.2              
#> 
#> loaded via a namespace (and not attached):
#>   [1] fs_1.6.6                    bitops_1.0-9               
#>   [3] enrichplot_1.28.2           httr_1.4.7                 
#>   [5] webshot_0.5.5               RColorBrewer_1.1-3         
#>   [7] doParallel_1.0.17           dynamicTreeCut_1.63-1      
#>   [9] backports_1.5.0             tippy_0.1.0                
#>  [11] tools_4.5.1                 R6_2.6.1                   
#>  [13] DT_0.33                     lazyeval_0.2.2             
#>  [15] mgcv_1.9-3                  GetoptLong_1.0.5           
#>  [17] withr_3.0.2                 prettyunits_1.2.0          
#>  [19] gridExtra_2.3               cli_3.6.5                  
#>  [21] textshaping_1.0.1           logging_0.10-108           
#>  [23] TSP_1.2-5                   labeling_0.4.3             
#>  [25] sass_0.4.10                 topGO_2.60.1               
#>  [27] bs4Dash_2.3.4               askpass_1.2.1              
#>  [29] ggridges_0.5.6              goseq_1.60.0               
#>  [31] pkgdown_2.1.3               Rsamtools_2.24.0           
#>  [33] systemfonts_1.2.3           yulab.utils_0.2.0          
#>  [35] gson_0.1.0                  txdbmaker_1.4.2            
#>  [37] DOSE_4.2.0                  R.utils_2.13.0             
#>  [39] limma_3.64.1                RSQLite_2.4.1              
#>  [41] visNetwork_2.1.2            gridGraphics_0.5-1         
#>  [43] shape_1.4.6.1               BiocIO_1.18.0              
#>  [45] dendextend_1.19.0           GO.db_3.21.0               
#>  [47] Matrix_1.7-3                abind_1.4-8                
#>  [49] R.methodsS3_1.8.2           lifecycle_1.0.4            
#>  [51] edgeR_4.6.2                 yaml_2.3.10                
#>  [53] qvalue_2.40.0               SparseArray_1.8.0          
#>  [55] BiocFileCache_2.16.0        grid_4.5.1                 
#>  [57] blob_1.2.4                  promises_1.3.3             
#>  [59] crayon_1.5.3                miniUI_0.1.2               
#>  [61] ggtangle_0.0.7              lattice_0.22-7             
#>  [63] billboarder_0.5.0           ComplexUpset_1.3.3         
#>  [65] cowplot_1.1.3               GenomicFeatures_1.60.0     
#>  [67] KEGGREST_1.48.1             pillar_1.10.2              
#>  [69] knitr_1.50                  ComplexHeatmap_2.24.1      
#>  [71] fgsea_1.34.0                rjson_0.2.23               
#>  [73] codetools_0.2-20            fastmatch_1.1-6            
#>  [75] glue_1.8.0                  ggfun_0.1.9                
#>  [77] data.table_1.17.6           vctrs_0.6.5                
#>  [79] png_0.1-8                   treeio_1.32.0              
#>  [81] gtable_0.3.6                assertthat_0.2.1           
#>  [83] cachem_1.1.0                xfun_0.52                  
#>  [85] S4Arrays_1.8.1              mime_0.13                  
#>  [87] ConsensusClusterPlus_1.72.0 seriation_1.5.7            
#>  [89] shinythemes_1.2.0           iterators_1.0.14           
#>  [91] statmod_1.5.0               nlme_3.1-168               
#>  [93] ggtree_3.16.0               bit64_4.6.0-1              
#>  [95] progress_1.2.3              filelock_1.0.3             
#>  [97] rprojroot_2.0.4             bslib_0.9.0                
#>  [99] colorspace_2.1-1            DBI_1.2.3                  
#> [101] mnormt_2.1.1                tidyselect_1.2.1           
#> [103] bit_4.6.0                   compiler_4.5.1             
#> [105] curl_6.4.0                  httr2_1.1.2                
#> [107] graph_1.86.0                BiasedUrn_2.0.12           
#> [109] SparseM_1.84-2              expm_1.0-0                 
#> [111] xml2_1.3.8                  ggdendro_0.2.0             
#> [113] desc_1.4.3                  DelayedArray_0.34.1        
#> [115] plotly_4.11.0               scrypt_0.1.6               
#> [117] colourpicker_1.3.0          rtracklayer_1.68.0         
#> [119] scales_1.4.0                psych_2.5.6                
#> [121] mosdef_1.4.1                rappdirs_0.3.3             
#> [123] stringr_1.5.1               digest_0.6.37              
#> [125] shinyBS_0.61.1              rmarkdown_2.29             
#> [127] ca_0.71.1                   XVector_0.48.0             
#> [129] htmltools_0.5.8.1           pkgconfig_2.0.3            
#> [131] learnr_0.11.5               dbplyr_2.5.0               
#> [133] fastmap_1.2.0               rlang_1.1.6                
#> [135] GlobalOptions_0.1.2         htmlwidgets_1.6.4          
#> [137] UCSC.utils_1.4.0            shiny_1.11.0               
#> [139] farver_2.1.2                jquerylib_0.1.4            
#> [141] shinymanager_1.0.410        jsonlite_2.0.0             
#> [143] BiocParallel_1.42.1         GOSemSim_2.34.0            
#> [145] R.oo_1.27.1                 RCurl_1.98-1.17            
#> [147] magrittr_2.0.3              GenomeInfoDbData_1.2.14    
#> [149] ggplotify_0.1.2             patchwork_1.3.1            
#> [151] Rcpp_1.0.14                 ape_5.8-1                  
#> [153] shinycssloaders_1.1.0       viridis_0.6.5              
#> [155] reticulate_1.42.0           stringi_1.8.7              
#> [157] rintrojs_0.3.4              MASS_7.3-65                
#> [159] plyr_1.8.9                  parallel_4.5.1             
#> [161] ggrepel_0.9.6               Biostrings_2.76.0          
#> [163] splines_4.5.1               hms_1.1.3                  
#> [165] geneLenDataBase_1.44.0      circlize_0.4.16            
#> [167] locfit_1.5-9.12             igraph_2.1.4               
#> [169] reshape2_1.4.4              biomaRt_2.64.0             
#> [171] XML_3.99-0.18               evaluate_1.0.4             
#> [173] foreach_1.5.2               tweenr_2.0.3               
#> [175] httpuv_1.6.16               backbone_2.1.4             
#> [177] tidyr_1.3.1                 openssl_2.3.3              
#> [179] purrr_1.0.4                 polyclip_1.10-7            
#> [181] reshape_0.8.10              heatmaply_1.5.0            
#> [183] clue_0.3-66                 ggplot2_3.5.2              
#> [185] ggforce_0.5.0               broom_1.0.8                
#> [187] xtable_1.8-4                restfulr_0.0.16            
#> [189] tidytree_0.4.6              later_1.4.2                
#> [191] viridisLite_0.4.2           ragg_1.4.0                 
#> [193] tibble_3.3.0                aplot_0.2.7                
#> [195] memoise_2.0.1               registry_0.5-1             
#> [197] GenomicAlignments_1.44.0    cluster_2.1.8.1            
#> [199] sortable_0.5.0              shinyWidgets_0.9.0         
#> [201] shinyAce_0.4.4