接上一篇的数据准备阶段,上一篇已经介绍了4种情况:
1.有直接的标准10X数据(barcodes.tsv/genes.tsv/matrix.mtx)
2.多个10X数据合并
3.只有矩阵数据
4.矩阵数据还需要筛选
最近又学习了几种情况,再来补充下:
5.多个csv/txt矩阵合并:
# 读取多个csv
data1 <- read.csv("******", header = T,row.names= 1)
data2 <- read.csv("******", header = T,row.names= 1)
data3 <- read.csv("******", header = T,row.names= 1)
#如果是txt文件,就用data = read.table("******", header = T,row.names= 1)
# 合并成数据框
datan = data.frame(data1,data2,data3)
# 数据框转换成稀疏矩阵matrix
dataan <- as(as.matrix(datan), "dgCMatrix")
6.数据分组比较多,想要R实现批量处理
例如:GSE162025
image.png
library(data.table)
samples=list.files('GSE162025_RAW/')
samples
pbmcList = lapply(samples,function(x){
x=samples[1]
print(x)
y=file.path('GSE162025_RAW',x )
a=fread(y,data.table = F)
a[1:4,1:4]
rownames(a)=a[,1]
a=a[,-1]
pbmc=CreateSeuratObject(a)
return(pbmc)
})
pbmcList
pbmc=merge(x=pbmcList[[1]],
y=pbmcList[ -1 ])
as.data.frame(pbmc@assays$RNA@counts[1:10, 1:2])
head(pbmc@meta.data)
library(stringr)
phe=str_split(rownames(pbmc@meta.data),'_',simplify = T)
head(phe)
pbmc@meta.data$patient=phe[,2]
pbmc@meta.data$location=phe[,3]
pbmc@meta.data$orig.ident=paste(phe[,3],phe[,2],sep = '_')
table(pbmc@meta.data)
这个代码是来自Jimmy老师,运行比较慢
如果想CCA合并,运行就更慢了
#保证当前工作目录下面后缀是matrices.csv.gz
options(stringsAsFactors = F)
fs=list.files(pattern = 'matrices.csv.gz')
fs
pbmcList <- lapply(fs, function(x){
a=read.csv( x )
a[1:4,1:4]
raw.data=a[,-1]
rownames(raw.data)=a[,1]
library(stringr)
p=str_split(x,'_',simplify = T)[,2]
pbmc <- CreateSeuratObject(counts = raw.data,project = p )
})
#多个单细胞对象的整合
pro='integrated'
for (i in 1:length(pbmcList)) {
pbmcList[[i]] <- NormalizeData(pbmcList[[i]], verbose = FALSE)
pbmcList[[i]] <- FindVariableFeatures(pbmcList[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
pbmcList
pbmc.anchors <- FindIntegrationAnchors(object.list = pbmcList, dims = 1:30)
pbmc.integrated <- IntegrateData(anchorset = pbmc.anchors, dims = 1:30)
DefaultAssay(pbmc.integrated) <- "integrated"
head(pbmc.integrated@meta.data)
tail(pbmc.integrated@meta.data)
网友评论