We considered five major cell subsets: (1) myeloid, (2) T, (3) B and plasma, (4) stromal, and (5) epithelial cells. We classified cells into these subsets according to their gene expression. We consider in this file the cells classified as Stroma. We reanalyze and QC the data, and look for the refined clusters.
library(Seurat)
library(plyr)
library(dplyr)
library(ggplot2)
library(DropletUtils)
library(celda)
library(SingleCellExperiment)
library(scater)
library(scran)
library(scDblFinder)
library(viridis)
library(MASS)
library(patchwork)
library(readr)
library(clustree)
<- function(x, y, ...) { # function from https://slowkow.com/notes/ggplot2-color-by-density/
get_density <- MASS::kde2d(x, y, ...)
dens <- findInterval(x, dens$x)
ix <- findInterval(y, dens$y)
iy <- cbind(ix, iy)
ii return(dens$z[ii])
}
<- function(l) { # function from https://stackoverflow.com/a/24241954
fancy_scientific # turn in to character string in scientific notation
<- format(l, scientific = TRUE)
l # quote the part before the exponent to keep all the digits
<- gsub("^(.*)e", "'\\1'e", l)
l # turn the 'e+' into plotmath format
<- gsub("e", "%*%10^", l)
l # return this as an expression
parse(text=l)
}
setwd("~/000_GitHub/ibd-bcn_single_cell")
source('source/functions_scrnaseq.R')
source('source/colors.R')
setwd('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/')
<- readRDS('SUBSETS/FROM_SAMPLES_TOGETHER/stroma.RDS') stroma
These are the clusters we selected as stroma cells in the “02_Data_to_subsets” file.
DimPlot(stroma, group.by = 'RNA_snn_res.0.1', label=T)
Distribution of cells in scatter plot (nfeatures/percent.mt) to check where are the majority of our cells. We can see the dots colored by density using the function get_density shown in [Load extra functions and sources].
<- stroma@meta.data
meta $density <- get_density(meta$percent.mt, meta$nFeature_RNA, n = 100)
metaggplot(meta) +
geom_point(aes(percent.mt, nFeature_RNA, color = density), size = 0.2) +
scale_color_viridis() + theme_classic() +
scale_y_log10()+
geom_vline(xintercept = 25, linetype = 2, color = 'gray')+
theme(text = element_text( size = 12),
axis.title = element_text( size = 12),
legend.text = element_blank())
We analyzed the data using different cutoffs for the percent.mt (data
not shown) and finally decided that 25% was a reasonable choice for our
dataset.
<- stroma[,stroma$percent.mt < 25] stroma
Right now stroma has 28287 features across 3670 cells. Nevertheless, many cells express genes that we know should not be expressed by stromal cells. We will remove those cells.
<- c('CD3E','CD3D','CD3G', 'C1QA',
genes 'DERL3', 'MS4A1', 'EPCAM')
for (gene in genes) {
<- FetchData(stroma, vars = c('nCount_RNA', 'nFeature_RNA', gene))
jd <- jd[order(jd[,ncol(jd)]),]
jd <- ggplot(jd, mapping = aes_string(x = 'nFeature_RNA', y = 'nCount_RNA', color = gene))+
k geom_point() + theme_classic() + scale_color_viridis(alpha = 0.8) + scale_x_sqrt() + scale_y_sqrt()
cat("#### ", gene, "\n"); print(k); cat("\n\n")
}
cat("#### code removal \n")
<- stroma@assays$RNA@counts
counts <- grep("CD3E$|CD3D$|CD3G$|MS4A1$|DERL3|^C1QA$|^EPCAM$",rownames(stroma))
p <- which(Matrix::colSums(counts[p,])>0)
pp <-setdiff(colnames(stroma), names(pp))
xx <- subset(stroma,cells = xx)
stroma cat('\n\n')
We have observed that due to normalization, there’s high Ig gene expression in cells that did not have high counts of Ig genes. As an example:
<- FeaturePlot(stroma, features = 'IGHA1', slot = 'counts', order=T) + labs(title='counts') + theme_classic()
a <- FeaturePlot(stroma, features = 'IGHA1', slot = 'data', order=T) + labs(title= 'data') + theme_classic()
b wrap_plots(a,b, nrow = 1)
We remove the Ig genes from the dataset in all subsets except for the plasma and B cells subset.
<- rownames(stroma)[c(grep("^IGH",rownames(stroma)),
gg grep("^IGK", rownames(stroma)),
grep("^IGL", rownames(stroma)))]
<- setdiff(rownames(stroma),gg)
genes <- subset(stroma,features = genes) stroma
We remove the genes without expression from the dataset.
<- stroma@assays$RNA@counts
counts <- which(Matrix::rowSums(counts)==0)
pp <-setdiff(rownames(stroma), names(pp))
xx <- subset(stroma, features = xx) stroma
<- seurat_to_pca(stroma)
stroma
<- ElbowPlot(stroma, ndims = 100) +
a geom_vline(xintercept = 25, colour="#BB0000", linetype = 2)+
labs(title = paste0('Elbowplot')) + theme_classic()
<- FindNeighbors(stroma, dims = 1:25)
stroma<-RunUMAP(stroma, dims=1:25)
stroma<- DimPlot(stroma, group.by = 'sample_name') +
b labs(title = '22 PCS') +
theme_classic() +
theme(legend.position = 'bottom')
+b) / guide_area() +
(aplot_layout(heights = c(0.7,0.3),guides = 'collect')
<- RunHarmony(stroma, group.by = 'sample', dims.use = 1:25)
stroma
<- ElbowPlot(stroma, ndims = 100, reduction = 'harmony') +
a geom_vline(xintercept = 31, colour="#BB0000", linetype = 2)+
labs(title = paste0('Elbowplot'))+ theme_classic()
<- FindNeighbors(stroma, reduction = "harmony", dims = 1:31)
stroma<-RunUMAP(stroma, dims=1:31, reduction= "harmony")
stroma
<- DimPlot(stroma, group.by = 'sample_name', pt.size = 0.1) +
b theme_classic() +
theme(legend.position = 'bottom')
+b) / guide_area() +
(aplot_layout(heights = c(0.7,0.3),guides = 'collect')
# dir.create('Markers')
# dir.create('Markers/Stroma')
<- '~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/Markers'
path setwd(path)
<- resolutions(stroma,
stroma workingdir = path,
title = 'Stroma/Markers_Stroma_')
<- '~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/ON_THEIR_OWN/'
path setwd(path)
saveRDS(stroma, file = paste0(path,'stroma_filtered.RDS'))
<- '~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/ON_THEIR_OWN/'
path <- readRDS(file = paste0(path,'stroma_filtered.RDS'))
stroma for(i in c('0.1','0.3','0.5','0.7','0.9','1.1','1.3','1.5')){
<-DimPlot(stroma, group.by = paste0('RNA_snn_res.',i), label=T) +
j theme_classic() +
theme(legend.position = 'none') +
labs(title = paste0('Resolution ',i))
cat("##### ", i, "\n"); print(j); cat("\n\n")
}
clustree(stroma, prefix = 'RNA_snn_res.') + guides(size = 'none', shape = 'none', edge_colour = FALSE, edge_alpha = FALSE) + theme(legend.position = 'right')
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
setwd('~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/')
for(i in c('0.1','0.3','0.5','0.7','0.9','1.1','1.3','1.5')){
<- read_delim(
marker_genes paste0("Markers/Stroma/Markers_Stroma__markers_resolution_",i,".csv"),
delim = ";", escape_double = FALSE, locale = locale(decimal_mark = ",",
grouping_mark = "."), trim_ws = TRUE)
<- marker_genes %>%
top2 ::group_by(cluster)%>%
dplyr::slice(1:2)
dplyr<- unique(top2$gene)
top2_g
<- DotPlot(stroma, features = top2_g, group.by = paste0('RNA_snn_res.', i)) +
j theme_classic() +
theme(axis.text.x = element_text(angle=90),
axis.title = element_blank()) +
NoLegend() +
labs(title = paste0('Resolution ',i))
cat("##### ", i, "\n"); print(j); cat("\n\n")
}
## Rows: 3511 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4274 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
0.3
## Rows: 4252 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
0.5
## Rows: 5106 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
0.7
## Rows: 6497 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
0.9
## Rows: 6751 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
1.1
## Rows: 7099 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
1.3
## Rows: 7487 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#####
1.5
$annotation_refined <- plyr::mapvalues(x = stroma$RNA_snn_res.1.5,
stromafrom = 0:16,
to = c("S1",
"IER fibroblasts",
"MT fibroblasts",
"S1.2",
"Inflammatory fibroblasts",
"S2a",
"Endothelium",
"Fibroblasts Ribhi",
"S1 Ribhi",
"Activated endothelium",
"Pericytes",
"S2b",
"Myofibroblasts",
"Glia",
"S3",
"FRCs",
"Lymphatic endothelium")
)
$annotation_intermediate <- plyr::mapvalues(x = stroma$RNA_snn_res.1.5,
stromafrom = 0:16,
to = c("S1",
"S1",
"Fibroblasts",
"S1",
"Inflammatory fibroblasts",
"S2",
"Endothelium",
"Fibroblasts",
"S1",
"Endothelium",
"Pericytes",
"S2",
"Myofibroblasts",
"Glia",
"S3",
"FRCs",
"Endothelium")
)
<- DimPlot(stroma, group.by = 'annotation_intermediate', label = T, repel=T) + NoLegend()
a <- DimPlot(stroma, group.by = 'annotation_refined', label = T, repel=T, cols = cols_stroma) +
b NoLegend()
+b a
saveRDS(stroma, file = '~/000_GitHub/ibd-bcn_single_cell/Analysis of our data/02_Samples_Together/SUBSETS/ON_THEIR_OWN/stroma_annotated.RDS')
head(stroma@meta.data)
## orig.ident nCount_RNA nFeature_RNA sample doublet Health newname Health_2 percent.mt
## SC_002_AAAGTAGGTTACCGAT-1 SC 14735 3700 SC_002 singlet HC HC_SC_002 HC 2.2944162
## SC_002_AACACGTAGAGCTATA-1 SC 3423 1380 SC_002 singlet HC HC_SC_002 HC 3.6950829
## SC_002_AACTCAGTCTGTCTCG-1 SC 1120 534 SC_002 singlet HC HC_SC_002 HC 6.8965517
## SC_002_AACTGGTAGAGGGATA-1 SC 647 357 SC_002 singlet HC HC_SC_002 HC 21.0365854
## SC_002_AAGCCGCCAACGCACC-1 SC 1039 511 SC_002 singlet HC HC_SC_002 HC 10.0856327
## SC_002_AAGGAGCAGTACTTGC-1 SC 578 351 SC_002 singlet HC HC_SC_002 HC 0.1730104
## RNA_snn_res.0.1 seurat_clusters RNA_snn_res.0.3 RNA_snn_res.0.5 RNA_snn_res.0.7
## SC_002_AAAGTAGGTTACCGAT-1 3 4 5 4 3
## SC_002_AACACGTAGAGCTATA-1 0 3 0 0 1
## SC_002_AACTCAGTCTGTCTCG-1 0 2 0 0 5
## SC_002_AACTGGTAGAGGGATA-1 0 2 0 0 5
## SC_002_AAGCCGCCAACGCACC-1 0 2 0 0 5
## SC_002_AAGGAGCAGTACTTGC-1 1 16 2 2 12
## RNA_snn_res.0.9 RNA_snn_res.1.1 RNA_snn_res.1.3 RNA_snn_res.1.5 sample_name
## SC_002_AAAGTAGGTTACCGAT-1 5 4 4 4 HC 1
## SC_002_AACACGTAGAGCTATA-1 1 1 3 3 HC 1
## SC_002_AACTCAGTCTGTCTCG-1 3 3 2 2 HC 1
## SC_002_AACTGGTAGAGGGATA-1 3 3 2 2 HC 1
## SC_002_AAGCCGCCAACGCACC-1 3 3 2 2 HC 1
## SC_002_AAGGAGCAGTACTTGC-1 4 14 15 16 HC 1
## annotation_refined annotation_intermediate
## SC_002_AAAGTAGGTTACCGAT-1 Inflammatory fibroblasts Inflammatory fibroblasts
## SC_002_AACACGTAGAGCTATA-1 S1.2 S1
## SC_002_AACTCAGTCTGTCTCG-1 MT fibroblasts Fibroblasts
## SC_002_AACTGGTAGAGGGATA-1 MT fibroblasts Fibroblasts
## SC_002_AAGCCGCCAACGCACC-1 MT fibroblasts Fibroblasts
## SC_002_AAGGAGCAGTACTTGC-1 Lymphatic endothelium Endothelium
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /opt/R/4.1.2/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8
## [5] LC_MONETARY=C.UTF-8 LC_MESSAGES=C LC_PAPER=C.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Matrix_1.4-0 nnet_7.3-17 matchSCore2_0.1.0 harmony_0.1.0
## [5] Rcpp_1.0.9 rmarkdown_2.18 pandoc_0.1.0 readxl_1.3.1
## [9] magick_2.7.3 data.table_1.14.2 BiocParallel_1.28.3 RColorBrewer_1.1-3
## [13] ggrepel_0.9.1 ggrastr_1.0.1 usethis_2.1.5 clustree_0.4.4
## [17] ggraph_2.0.5 readr_2.1.2 dplyr_1.0.10 cowplot_1.1.1
## [21] reshape_0.8.8 formulaic_0.0.8 patchwork_1.1.2 MASS_7.3-55
## [25] viridis_0.6.2 viridisLite_0.4.1 scDblFinder_1.8.0 scran_1.22.1
## [29] scater_1.22.0 scuttle_1.4.0 celda_1.10.0 beepr_1.3
## [33] DropletUtils_1.14.2 SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0 Biobase_2.54.0
## [37] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0 S4Vectors_0.32.4
## [41] BiocGenerics_0.40.0 MatrixGenerics_1.6.0 matrixStats_0.62.0 ggplot2_3.3.6
## [45] plyr_1.8.7 sp_1.5-0 SeuratObject_4.1.1 Seurat_4.1.0.9007
##
## loaded via a namespace (and not attached):
## [1] rsvd_1.0.5 ica_1.0-3 corpcor_1.6.10 assertive.properties_0.0-4
## [5] foreach_1.5.2 lmtest_0.9-40 rprojroot_2.0.3 crayon_1.5.2
## [9] spatstat.core_2.4-4 rhdf5filters_1.6.0 backports_1.4.1 nlme_3.1-155
## [13] rlang_1.0.6 XVector_0.34.0 ROCR_1.0-11 irlba_2.3.5
## [17] SparseM_1.81 limma_3.50.3 xgboost_1.5.0.2 rjson_0.2.21
## [21] bit64_4.0.5 glue_1.6.2 sctransform_0.3.4 parallel_4.1.2
## [25] vipor_0.4.5 spatstat.sparse_2.1-1 AnnotationDbi_1.56.2 spatstat.geom_2.4-0
## [29] tidyselect_1.1.2 fitdistrplus_1.1-8 tidyr_1.2.1 assertive.types_0.0-3
## [33] zoo_1.8-10 org.Mm.eg.db_3.14.0 xtable_1.8-4 magrittr_2.0.3
## [37] evaluate_0.18 cli_3.4.0 zlibbioc_1.40.0 rstudioapi_0.13
## [41] miniUI_0.1.1.1 bslib_0.4.1 rpart_4.1.16 RcppEigen_0.3.3.9.2
## [45] shiny_1.7.3 BiocSingular_1.10.0 xfun_0.34 clue_0.3-60
## [49] cluster_2.1.2 tidygraph_1.2.1 KEGGREST_1.34.0 tibble_3.1.8
## [53] listenv_0.8.0 Biostrings_2.62.0 png_0.1-7 future_1.28.0
## [57] withr_2.5.0 bitops_1.0-7 ggforce_0.3.3 cellranger_1.1.0
## [61] assertive.base_0.0-9 dqrng_0.3.0 pillar_1.8.1 GlobalOptions_0.1.2
## [65] cachem_1.0.6 fs_1.5.2 GetoptLong_1.0.5 DelayedMatrixStats_1.16.0
## [69] vctrs_0.4.1 ellipsis_0.3.2 generics_0.1.3 tools_4.1.2
## [73] beeswarm_0.4.0 munsell_0.5.0 tweenr_1.0.2 DelayedArray_0.20.0
## [77] fastmap_1.1.0 compiler_4.1.2 pkgload_1.2.4 abind_1.4-5
## [81] httpuv_1.6.6 plotly_4.10.0 rgeos_0.5-9 GenomeInfoDbData_1.2.7
## [85] gridExtra_2.3 enrichR_3.0 edgeR_3.36.0 lattice_0.20-45
## [89] deldir_1.0-6 utf8_1.2.2 later_1.3.0 jsonlite_1.8.3
## [93] multipanelfigure_2.1.2 scales_1.2.1 graph_1.72.0 ScaledMatrix_1.2.0
## [97] pbapply_1.5-0 sparseMatrixStats_1.6.0 lazyeval_0.2.2 promises_1.2.0.1
## [101] doParallel_1.0.17 R.utils_2.12.0 goftest_1.2-3 checkmate_2.0.0
## [105] spatstat.utils_2.3-1 reticulate_1.26 textshaping_0.3.6 statmod_1.4.36
## [109] Rtsne_0.16 uwot_0.1.14 igraph_1.3.4 HDF5Array_1.22.1
## [113] survival_3.2-13 rsconnect_0.8.25 yaml_2.3.6 systemfonts_1.0.4
## [117] htmltools_0.5.3 memoise_2.0.1 locfit_1.5-9.6 graphlayouts_0.8.0
## [121] digest_0.6.30 assertthat_0.2.1 rappdirs_0.3.3 mime_0.12
## [125] RSQLite_2.2.17 future.apply_1.9.1 blob_1.2.3 R.oo_1.25.0
## [129] ragg_1.2.1 splines_4.1.2 labeling_0.4.2 Rhdf5lib_1.16.0
## [133] RCurl_1.98-1.8 assertive.numbers_0.0-2 hms_1.1.1 rhdf5_2.38.1
## [137] colorspace_2.0-3 ggbeeswarm_0.6.0 shape_1.4.6 assertive.files_0.0-2
## [141] sass_0.4.2 RANN_2.6.1 circlize_0.4.14 audio_0.1-10
## [145] fansi_1.0.3 tzdb_0.2.0 brio_1.1.3 parallelly_1.32.1
## [149] R6_2.5.1 grid_4.1.2 ggridges_0.5.3 lifecycle_1.0.3
## [153] formatR_1.12 bluster_1.4.0 curl_4.3.2 jquerylib_0.1.4
## [157] leiden_0.4.3 testthat_3.1.2 desc_1.4.0 RcppAnnoy_0.0.19
## [161] org.Hs.eg.db_3.14.0 iterators_1.0.14 stringr_1.4.1 topGO_2.46.0
## [165] htmlwidgets_1.5.4 beachmat_2.10.0 polyclip_1.10-0 purrr_0.3.4
## [169] gridGraphics_0.5-1 ComplexHeatmap_2.10.0 mgcv_1.8-38 globals_0.16.1
## [173] spatstat.random_2.2-0 progressr_0.11.0 codetools_0.2-18 GO.db_3.14.0
## [177] metapod_1.2.0 MCMCprecision_0.4.0 R.methodsS3_1.8.2 gtable_0.3.1
## [181] DBI_1.1.3 highr_0.9 tensor_1.5 httr_1.4.4
## [185] KernSmooth_2.23-20 vroom_1.5.7 stringi_1.7.8 reshape2_1.4.4
## [189] farver_2.1.1 combinat_0.0-8 BiocNeighbors_1.12.0 scattermore_0.8
## [193] bit_4.0.4 spatstat.data_2.2-0 pkgconfig_2.0.3 corrplot_0.92
## [197] knitr_1.40