PAGODA全称pathway and gene set overdispersion analysis,pagoda是2016年在nature methods发表的一个分析单细胞测序的方法,主要特点是在已知的重要信号通路基础上对细胞进行分类,以提高统计效力并揭示可能的功能性解释。同时RNA速率推断细胞演化velocyto是基于pagoda,因此有必要学一下pagoda。
安装
#下载pagoda依赖的软件
sudo apt-get update
sudo apt-get -y install build-essential cmake gsl-bin libgsl0-dev libeigen3-dev libboost-all-dev libssl-dev libcurl4-openssl-dev libssl-dev libcairo2-dev libxt-dev libgtk2.0-dev libcairo2-dev xvfb xauth xfonts-base
#R中安装
source("http://bioconductor.org/biocLite.R")
biocLite(c("GO.db", "org.Hs.eg.db","org.Mm.eg.db", "pcaMethods"), suppressUpdates=TRUE)
library(devtools)
install_github("igraph/rigraph")
install_github("jkrijthe/Rtsne",ref="openmp")
install.packages(c("Cairo","urltools"))
# Install pagoda
install_github("hms-dbmi/pagoda2")
library('pagoda2')
1.数据读入
#读入表达矩阵,这里用的seurat包中的数据
dta<-as.matrix(GetAssayData(P0,slot = "counts"))
par(mfrow=c(1,2), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 1.0)
hist(log10(colSums(dta)+1),main='molecules per cell',col='cornsilk',xlab='log10(molecules per cell)')
hist(log10(rowSums(dta)+1),main='molecules per gene',col='cornsilk',xlab='log10(molecules per gene])')
image.png
2.对表达量差异很大的基因对下游分析所占比重进行调整
r$adjustVariance(plot=T,gam.k=10)
image.png
3.聚类·
#First, the PCA reduction.
r$calculatePcaReduction(nPcs=50,n.odgenes=3e3)
#generate a KNN graph
r$makeKnnGraph(k=35,type='PCA',center=T,distance='cosine')
#call clusters
r$getKnnClusters(method=infomap.community,type='PCA')
#tsne
r$getEmbedding(type='PCA',embeddingType='tSNE',perplexity=50,verbose=F)
r$plotEmbedding(type='PCA',embeddingType='tSNE',show.legend=F,mark.clusters=T,min.group.size=50,shuffle.colors=F,mark.cluster.cex=1,alpha=0.8,main='clusters (tSNE)')
image.png
4.查看特定特征表达
#We can use the same plotEmbedding() function to show all kinds of other values. For instance, let’s look at depth, or an expresson pattern of a gene:
str(r$depth)
par(mfrow=c(1,2))
r$plotEmbedding(type='PCA',embeddingType='tSNE',colors=r$depth,shuffle.colors=F,mark.cluster.cex=1,alpha=0.1,main='depth')
image.png
par(mfrow=c(1,2))
gene<-"Myh6"
r$plotEmbedding(type='PCA',embeddingType='tSNE',colors=r$counts[,gene],shuffle.colors=F,mark.cluster.cex=1,alpha=0.8,main=gene)
gene <-"Col1a1"
r$plotEmbedding(type='PCA',embeddingType='tSNE',colors=r$counts[,gene],shuffle.colors=F,mark.cluster.cex=1,alpha=0.1,main=gene)
image.png
- multilevel 聚类,群更简明一些
#We can generate multiple potential clusterings, with different names. Here we’ll use multilevel clustering:
r$getKnnClusters(method=multilevel.community,type='PCA',name='multilevel')
str(r$clusters)
#
par(mfrow=c(1,2))
r$plotEmbedding(type='PCA',embeddingType='tSNE',groups=r$clusters$PCA$community,show.legend=F,mark.clusters=T,min.group.size=1,shuffle.colors=F,mark.cluster.cex=1,alpha=0.1,main='infomap clusters (tSNE)')
#
r$plotEmbedding(type='PCA',embeddingType='tSNE',clusterType='multilevel',show.legend=F,mark.clusters=T,min.group.size=1,shuffle.colors=F,mark.cluster.cex=1,alpha=0.1,main='multlevel clusters (tSNE)')
image.png
6.热图展示差异基因
#Run differential expression on the infomap clusters:
r$getDifferentialGenes(type='PCA',verbose=T,clusterType='multilevel')
#r$diffgenes$PCA[[1]]为community,[[2]]为multilevel
de <- r$diffgenes$PCA[[2]][['4']];
r$plotGeneHeatmap(genes=rownames(de)[1:15],groups=r$clusters$PCA[[2]])
image.png
欢迎关注!
参考:
http://pklab.med.harvard.edu/peterk/p2.walkthrough.html
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4772672/#!po=12.5000
https://www.jianshu.com/p/f4d79b91d448
http://hms-dbmi.github.io/scde/pagoda.html
网友评论