作者,Evil Genius
生信是一种手段,大家要学会灵活运用,不要生搬硬套,很多时候大家需要根据自己的情况进行修改。
这也是空转系列课程中上课所得到分析结果。
在文章Integrating microarray-based spatial transcriptomics and single-cell RNA-seq reveals tissue architecture in pancreatic ductal adenocarcinomas中MIA的运用结果是
好的图片在绘制的时候也需要精心的准备
我们来补充一下,还是用上课的示例数据,初步的实现效果
test.cluster.MIA.enrich.heatmap.png
#! usr/bin/R
### zhaoyunfei
### 20231111
suppressMessages({
library(Seurat)
library(argparse)
library(dplyr)
library(ggplot2)
library(tidyverse)
})
parser = ArgumentParser()
parser$add_argument("--sc_rds", help="the sc data",required = T)
parser$add_argument("--spatial_rds", help="the sp data",required = T)
parser$add_argument("--sample", help="the sample name",required = T)
parser$add_argument("--outdir", help="the outdir",default = './')
parser$add_argument("--celltype", help="the annotation for celltype")
parser$add_argument("--normalization_method",default = 'SCT',choice = c('SCT','LogNormalize'))
parser$add_argument("-s","--sc_min_pct", help="the min pct for sc diff test",default = 0.3)
parser$add_argument("--sc_logfc", help="the threshold of logfc for sc diff test",default = 0.25)
parser$add_argument("--region", help="the spatial region annotation,eg:Barcode,Cluster")
parser$add_argument("--sp_min_pct", help="the min pct for sp diff test",default = 0.3)
parser$add_argument("--sp_logfc", help="the threshold of logfc for sp diff test",default = 0.25)
args <- parser$parse_args()
str(args)
sc_rds = args$sc_rds
spatial_rds = args$spatial_rds
outdir = args$outdir
celltype = args$celltype
normalization_method = args$normalization_method
sample = args$sample
sc_min = args$sc_min_pct
sc_logfc = args$sc_logfc
region = args$region
sp.pct = args$sp_min_pct
sp.logfc = args$sp_logfc
if (!file.exists(outdir)) {dir.create(outdir,recursive = TRUE)}
if (strsplit(sc_rds,'[.]')[[1]][length(strsplit(sc_rds,'[.]')[[1]])] == 'rds'){
cortex_sc <- readRDS(sc_rds)
if (normalization_method == 'SCT'){
cortex_sc <- SCTransform(cortex_sc, ncells = 3000, verbose = FALSE) %>% RunPCA(verbose = FALSE) %>% RunUMAP(dims = 1:20)}else{
cortex_sc <- NormalizeData(cortex_sc, normalization.method = "LogNormalize", scale.factor = 10000)
cortex_sc <- FindVariableFeatures(cortex_sc, selection.method = "vst", nfeatures = 2000)
cortex_sc <- ScaleData(cortex_sc, features = rownames(cortex_sc))
cortex_sc <- RunPCA(cortex_sc, features = VariableFeatures(object = cortex_sc))
cortex_sc <- RunUMAP(cortex_sc, dims = 1:20)
}
cortex_sc <- Seurat::FindNeighbors(cortex_sc, dims = 1:20, verbose = FALSE)
cortex_sc <- Seurat::FindClusters(cortex_sc, verbose = FALSE,resolution = 0.5)
} else if (strsplit(sc_rds,'[.]')[[1]][length(strsplit(sc_rds,'[.]')[[1]])] == 'csv'){
gene_bar <- read.csv(sc_rds,header=T,row.names=1,sep=',',check.names = F)
cortex_sc <- CreateSeuratObject(counts = gene_bar, min.cells = 3, project = 'sc')
cortex_sc <- Seurat::SCTransform(cortex_sc, verbose = FALSE , ncells = 3000)
cortex_sc <- Seurat::RunPCA(cortex_sc, verbose = FALSE)
cortex_sc <- Seurat::RunUMAP(cortex_sc, dims = 1:20, verbose = FALSE)
cortex_sc <- Seurat::FindNeighbors(cortex_sc, dims = 1:20, verbose = FALSE)
cortex_sc <- Seurat::FindClusters(cortex_sc, verbose = FALSE)
}
if ( ! is.null(celltype) ) {
anno = read.csv(celltype,header = T)
if (length(anno$Cluster) != length(rownames(cortex_sc))){print("warning ~~~ ,The length of celltype file is not match the sc data,subset will be acrry out ~~~")}
index = na.omit(match(colnames(cortex_sc),anno$Barcode))
cortex_sc = cortex_sc[,index]
Seurat::Idents(object = cortex_sc) <- anno$Cluster
cortex_sc$seurat_clusters = anno$Cluster
if (! is.null(anno$Sample)){cortex_sc$orig.ident = anno$Sample}
}else {Seurat::Idents(object = cortex_sc) <- cortex_sc$seurat_clusters
}
DefaultAssay(cortex_sc) = 'RNA'
sc.markers <- FindAllMarkers(cortex_sc, only.pos = TRUE, min.pct = sc_min, logfc.threshold = sc.logfc)
sc.markers$d = sc.markers$pct.1 - sc.markers$pct.2
sc.main.marker = subset(sc.markers , avg_log2FC > 0.3 & p_val_adj < 0.05 & d > 0.2)
sc.main.marker = sc.main.marker %>% arrange(cluster,desc(avg_log2FC))
sc.main.marker = as.data.frame(sc.main.marker)
sc.main.marker$cluster = paste('sc',sc.main.marker$cluster,sep = '_')
#####空间转录组处理
cortex_sp = readRDS(spatial_rds)
cortex_sp <- SCTransform(cortex_sp, verbose = FALSE,assay = "Spatial")
cortex_sp <- RunPCA(cortex_sp, assay = "SCT", verbose = FALSE)
cortex_sp <- FindNeighbors(cortex_sp, reduction = "pca", dims = 1:20)
cortex_sp <- FindClusters(cortex_sp, verbose = FALSE,resolution = 0.5)
cortex_sp <- RunUMAP(cortex_sp, reduction = "pca", dims = 1:20)
if (!is.null(region)){
sp_anno = read.csv(region,header = T)
cortex_sp = cortex_sp[,sp_anno$Barcode]
Idents(cortex_sp) = sp_anno$Cluster
cortex_sp$seurat_clusters = sp_anno$Cluster
}
region_marker=FindAllMarkers(cortex_sp,logfc.threshold = sp.logfc,only.pos = T,min.pct = sp.pct)
region_marker$d=region_marker$pct.1 - region_marker$pct.2
region_main_marker = subset(region_marker,avg_log2FC > 0.3 & p_val_adj < 0.05 & d > 0.1)
region_main_marker = region_main_marker %>% arrange(cluster,desc(avg_log2FC))
region_main_marker = as.data.frame(region_main_marker)
region_main_marker$cluster = paste('spatial',region_main_marker$cluster,sep = '_')
#####MIA
region_specific = region_main_marker[,c("cluster","gene")]
colnames(region_specific)[1]="region"
celltype_specific = sc.main.marker[,c("cluster","gene")]
colnames(celltype_specific)[1]="celltype"
source("MIA.R")
Result = zhao_MIA(region_specific,celltype_specific,sample,outdir)
需要source的脚本MIA.R
网友评论