We considered five major cell subsets: (1) myeloid, (2) T, (3) B and plasma, (4) stromal, and (5) epithelial cells. We classified cells into these subsets according to their gene expression.
library(Seurat)
library(plyr)
library(dplyr)
library(ggplot2)
library(DropletUtils)
library(celda)
library(SingleCellExperiment)
library(scater)
library(scran)
library(scDblFinder)
library(viridis)
library(MASS)
library(patchwork)
library(readr)
<- function(x, y, ...) { # function from https://slowkow.com/notes/ggplot2-color-by-density/
get_density <- MASS::kde2d(x, y, ...)
dens <- findInterval(x, dens$x)
ix <- findInterval(y, dens$y)
iy <- cbind(ix, iy)
ii return(dens$z[ii])
}
<- function(l) { # function from https://stackoverflow.com/a/24241954
fancy_scientific # turn in to character string in scientific notation
<- format(l, scientific = TRUE)
l # quote the part before the exponent to keep all the digits
<- gsub("^(.*)e", "'\\1'e", l)
l # turn the 'e+' into plotmath format
<- gsub("e", "%*%10^", l)
l # return this as an expression
parse(text=l)
}
setwd("~/000_GitHub/ibd-bcn_single_cell")
source('source/functions_scrnaseq.R')
source('source/colors.R')
setwd('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/')
<- readRDS(file = 'filtered_data_together.RDS') seudataf
We cluster the samples using louvain classification and look for the markers.
setwd('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/')
<- seurat_to_pca(seudataf)
seudataf
<- ElbowPlot(seudataf, ndims = 50) +
a geom_vline(xintercept = 23, colour="#BB0000", linetype = 2)+
labs(title = paste0('Elbowplot'))+ theme_classic()
<- FindNeighbors(seudataf, dims = 1:23)
seudataf <- RunUMAP(seudataf, dims = 1:23)
seudataf
<- DimPlot(seudataf, group.by = 'sample_name', pt.size = 0.1) +
b labs(title = '23 PCS') +
theme_classic() +
theme(legend.position = 'bottom')
+b) / guide_area() +
(aplot_layout(heights = c(0.7,0.3),guides = 'collect')
dir.create('ALL')
dir.create('ALL/filter_65')
<- resolutions(seudataf, resolutions = c(0.1, 0.3, 0.5),
seudataf workingdir = 'ALL/filter_65/',
title = 'ALL')
saveRDS(seudataf,'ALL/filter_65/ALL.RDS')
<- DimPlot(seudataf, group.by = 'RNA_snn_res.0.1', label = T) + theme_classic() + NoLegend()
a <- DimPlot(seudataf, group.by = 'RNA_snn_res.0.3', label = T) + theme_classic() + NoLegend()
b <- DimPlot(seudataf, group.by = 'RNA_snn_res.0.5', label = T) + theme_classic() + NoLegend()
c
+ b) / (c + plot_spacer() ) (a
<- read_delim("ALL/filter_65/ALL_markers_resolution_0.1.csv",
marker_genes delim = ";", escape_double = FALSE,
locale = locale(decimal_mark = ",", grouping_mark = "."),
trim_ws = TRUE)
## Rows: 9064 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- marker_genes %>%
top5 ::group_by(cluster)%>%
dplyr::slice(1:5)
dplyr<- unique(top5$gene)
top5_g
DotPlot(seudataf, features = top5_g, group.by = 'RNA_snn_res.0.1' ) + theme_classic() + theme(axis.text.x = element_text(angle=90), axis.title = element_blank()) + NoLegend() + labs(title = 'Resolution 0.1')
<- data.frame(
subset_genes stringsAsFactors = FALSE,
gene = c("BANK1","CD19","CD79A","DERL3", # b and plasma
"MS4A1","MZB1",
"AQP8","BEST4", "EPCAM","MUC2", # epi
"OLFM4","TRPM5", "ZG16",
"AIF1","C1QA","C1QB","CD14", # myeloids
"CMTM2","FCGR3B","LYZ","MS4A2",
"TPSAB1","TPSB2", 'CPA3',
'CD14', 'CD163', 'CSF1R', 'CD68',
"ACTA2","ADAMDEC1","CHI3L1", # stroma
"COL3A1","NRXN1","PLVAP","SOX6",
"VWF",
'PDGFRA', 'PDGFRB', 'PTGDS', 'LUM', 'MMP2',
"CD3D","CD3E","CD3G", # tcells
"CD8A","FOXP3","GZMA","GZMB",
"TRAC", "NKG7","TRBC1",
'KLRB1'),
subset = c("B and plasmacells","B and plasmacells",
"B and plasmacells",
"B and plasmacells","B and plasmacells",
"B and plasmacells",
"Epithelium",
"Epithelium","Epithelium",
"Epithelium","Epithelium",
"Epithelium","Epithelium",
"Myeloid cells","Myeloid cells","Myeloid cells",
"Myeloid cells","Myeloid cells","Myeloid cells",
"Myeloid cells","Myeloid cells","Myeloid cells",
"Myeloid cells","Myeloid cells","Myeloid cells",
"Myeloid cells","Myeloid cells","Myeloid cells",
"Stromal cells","Stromal cells",
"Stromal cells","Stromal cells","Stromal cells",
"Stromal cells","Stromal cells","Stromal cells",
"Stromal cells","Stromal cells","Stromal cells",
"Stromal cells","Stromal cells",
"T cells","T cells",
"T cells","T cells","T cells",
"T cells","T cells","T cells",
"T cells","T cells", 'NK or NKT cells')
)for(subset in unique(subset_genes$subset)){
cat("### ", subset, "{.tabset} \n\n")
<- subset_genes$gene[subset_genes$subset == subset]
genes for (gene in genes) {
<- FeaturePlot(seudataf, features = gene, order=T) + theme_classic()
j cat("#### ", gene, "\n"); print(j); cat("\n\n")
} }
cat("## Dotplot \n")
DotPlot(seudataf, features = unique(subset_genes$gene), group.by = 'RNA_snn_res.0.1' ) + theme_classic() + theme(axis.text.x = element_text(angle=90, vjust = 0.5), axis.title = element_blank()) + NoLegend() + labs(title = 'Resolution 0.1')
We separate an extra subset of cycling cells and save each subset as a separated object.
<- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(5,7,8)]
myeloids <- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(1)]
tcells <- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(0, 6)]
plasmas <- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(2,4,10)]
epi <- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(12,11,3)]
stroma <- seudataf[,seudataf$RNA_snn_res.0.1 %in% c(9)]
cycling
dir.create('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/')
setwd('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/')
dir.create('FROM_SAMPLES_TOGETHER')
# save each subset as a separated object
saveRDS(myeloids,'FROM_SAMPLES_TOGETHER/myeloids.RDS')
saveRDS(tcells,'FROM_SAMPLES_TOGETHER/tcells.RDS')
saveRDS(plasmas,'FROM_SAMPLES_TOGETHER/plasmas.RDS')
saveRDS(epi,'FROM_SAMPLES_TOGETHER/epi.RDS')
saveRDS(stroma,'FROM_SAMPLES_TOGETHER/stroma.RDS')
saveRDS(cycling,'FROM_SAMPLES_TOGETHER/cycling.RDS')
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /opt/R/4.1.2/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8
## [5] LC_MONETARY=C.UTF-8 LC_MESSAGES=C LC_PAPER=C.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Matrix_1.4-0 nnet_7.3-17 matchSCore2_0.1.0 harmony_0.1.0
## [5] Rcpp_1.0.9 rmarkdown_2.18 pandoc_0.1.0 readxl_1.3.1
## [9] magick_2.7.3 data.table_1.14.2 BiocParallel_1.28.3 RColorBrewer_1.1-3
## [13] ggrepel_0.9.1 ggrastr_1.0.1 usethis_2.1.5 clustree_0.4.4
## [17] ggraph_2.0.5 readr_2.1.2 dplyr_1.0.10 cowplot_1.1.1
## [21] reshape_0.8.8 formulaic_0.0.8 patchwork_1.1.2 MASS_7.3-55
## [25] viridis_0.6.2 viridisLite_0.4.1 scDblFinder_1.8.0 scran_1.22.1
## [29] scater_1.22.0 scuttle_1.4.0 celda_1.10.0 beepr_1.3
## [33] DropletUtils_1.14.2 SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0 Biobase_2.54.0
## [37] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0 S4Vectors_0.32.4
## [41] BiocGenerics_0.40.0 MatrixGenerics_1.6.0 matrixStats_0.62.0 ggplot2_3.3.6
## [45] plyr_1.8.7 sp_1.5-0 SeuratObject_4.1.1 Seurat_4.1.0.9007
##
## loaded via a namespace (and not attached):
## [1] rsvd_1.0.5 ica_1.0-3 corpcor_1.6.10 assertive.properties_0.0-4
## [5] foreach_1.5.2 lmtest_0.9-40 rprojroot_2.0.3 crayon_1.5.2
## [9] spatstat.core_2.4-4 rhdf5filters_1.6.0 backports_1.4.1 nlme_3.1-155
## [13] rlang_1.0.6 XVector_0.34.0 ROCR_1.0-11 irlba_2.3.5
## [17] SparseM_1.81 limma_3.50.3 xgboost_1.5.0.2 rjson_0.2.21
## [21] bit64_4.0.5 glue_1.6.2 sctransform_0.3.4 parallel_4.1.2
## [25] vipor_0.4.5 spatstat.sparse_2.1-1 AnnotationDbi_1.56.2 spatstat.geom_2.4-0
## [29] tidyselect_1.1.2 fitdistrplus_1.1-8 tidyr_1.2.1 assertive.types_0.0-3
## [33] zoo_1.8-10 org.Mm.eg.db_3.14.0 xtable_1.8-4 magrittr_2.0.3
## [37] evaluate_0.18 cli_3.4.0 zlibbioc_1.40.0 rstudioapi_0.13
## [41] miniUI_0.1.1.1 bslib_0.4.1 rpart_4.1.16 RcppEigen_0.3.3.9.2
## [45] shiny_1.7.3 BiocSingular_1.10.0 xfun_0.34 clue_0.3-60
## [49] cluster_2.1.2 tidygraph_1.2.1 KEGGREST_1.34.0 tibble_3.1.8
## [53] listenv_0.8.0 Biostrings_2.62.0 png_0.1-7 future_1.28.0
## [57] withr_2.5.0 bitops_1.0-7 ggforce_0.3.3 cellranger_1.1.0
## [61] assertive.base_0.0-9 dqrng_0.3.0 pillar_1.8.1 GlobalOptions_0.1.2
## [65] cachem_1.0.6 fs_1.5.2 GetoptLong_1.0.5 DelayedMatrixStats_1.16.0
## [69] vctrs_0.4.1 ellipsis_0.3.2 generics_0.1.3 tools_4.1.2
## [73] beeswarm_0.4.0 munsell_0.5.0 tweenr_1.0.2 DelayedArray_0.20.0
## [77] fastmap_1.1.0 compiler_4.1.2 pkgload_1.2.4 abind_1.4-5
## [81] httpuv_1.6.6 plotly_4.10.0 rgeos_0.5-9 GenomeInfoDbData_1.2.7
## [85] gridExtra_2.3 enrichR_3.0 edgeR_3.36.0 lattice_0.20-45
## [89] deldir_1.0-6 utf8_1.2.2 later_1.3.0 jsonlite_1.8.3
## [93] multipanelfigure_2.1.2 scales_1.2.1 graph_1.72.0 ScaledMatrix_1.2.0
## [97] pbapply_1.5-0 sparseMatrixStats_1.6.0 lazyeval_0.2.2 promises_1.2.0.1
## [101] doParallel_1.0.17 R.utils_2.12.0 goftest_1.2-3 checkmate_2.0.0
## [105] spatstat.utils_2.3-1 reticulate_1.26 textshaping_0.3.6 statmod_1.4.36
## [109] Rtsne_0.16 uwot_0.1.14 igraph_1.3.4 HDF5Array_1.22.1
## [113] survival_3.2-13 rsconnect_0.8.25 yaml_2.3.6 systemfonts_1.0.4
## [117] htmltools_0.5.3 memoise_2.0.1 locfit_1.5-9.6 graphlayouts_0.8.0
## [121] digest_0.6.30 assertthat_0.2.1 rappdirs_0.3.3 mime_0.12
## [125] RSQLite_2.2.17 future.apply_1.9.1 blob_1.2.3 R.oo_1.25.0
## [129] ragg_1.2.1 splines_4.1.2 labeling_0.4.2 Rhdf5lib_1.16.0
## [133] RCurl_1.98-1.8 assertive.numbers_0.0-2 hms_1.1.1 rhdf5_2.38.1
## [137] colorspace_2.0-3 ggbeeswarm_0.6.0 shape_1.4.6 assertive.files_0.0-2
## [141] sass_0.4.2 RANN_2.6.1 circlize_0.4.14 audio_0.1-10
## [145] fansi_1.0.3 tzdb_0.2.0 brio_1.1.3 parallelly_1.32.1
## [149] R6_2.5.1 grid_4.1.2 ggridges_0.5.3 lifecycle_1.0.3
## [153] formatR_1.12 bluster_1.4.0 curl_4.3.2 jquerylib_0.1.4
## [157] leiden_0.4.3 testthat_3.1.2 desc_1.4.0 RcppAnnoy_0.0.19
## [161] org.Hs.eg.db_3.14.0 iterators_1.0.14 stringr_1.4.1 topGO_2.46.0
## [165] htmlwidgets_1.5.4 beachmat_2.10.0 polyclip_1.10-0 purrr_0.3.4
## [169] gridGraphics_0.5-1 ComplexHeatmap_2.10.0 mgcv_1.8-38 globals_0.16.1
## [173] spatstat.random_2.2-0 progressr_0.11.0 codetools_0.2-18 GO.db_3.14.0
## [177] metapod_1.2.0 MCMCprecision_0.4.0 R.methodsS3_1.8.2 gtable_0.3.1
## [181] DBI_1.1.3 highr_0.9 tensor_1.5 httr_1.4.4
## [185] KernSmooth_2.23-20 vroom_1.5.7 stringi_1.7.8 reshape2_1.4.4
## [189] farver_2.1.1 combinat_0.0-8 BiocNeighbors_1.12.0 scattermore_0.8
## [193] bit_4.0.4 spatstat.data_2.2-0 pkgconfig_2.0.3 corrplot_0.92
## [197] knitr_1.40