所有样本中获取共有甲基化位点,进行候选相关性和PCA分析
脚本名称filter.py
#!~/anaconda3/envs/meme/bin/python3
import sys
#count = {}
data = {}
for f in sys.argv[1:]:
with open(f, 'r') as file:
for line in file:
tem = line.strip().split("\t")
# if tem[0] in count:
# count[tem[0]] += 1
# else:
# count[tem[0]] = 1
if tem[0] in data:
data[tem[0]].append(tem[1])
else:
data[tem[0]] = [tem[1]]
print("id\t" + "\t".join(sys.argv[1:]))
for k, v in data.items():
# if count[k] == 12:
if len(v) == 12:
print(k + "\t" + "\t".join(v))
用法 python filter.py S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 > 400bin.txt
#ForCM
利用corrplot R包进行相关性分析
setwd("C:/Users/Administrator/Documents/CM/")
testcorr111 = read.table(file ="sample.exp.txt",header = T)
sample.exp.txt格式示例h<- testcorr111[,2:46]
pdf("ChenCor2.pdf")
cor(h)
library(corrplot)
res_cor <- cor(h)
heatmap(res_cor)
write.table(res_cor,file = "ChenCor.txt",sep = "\t", row.names = TRUE, col.names = TRUE)
corrplot.mixed(corr=res_cor, add=TRUE, lower = 'number', upper = 'pie')
dev.off()
##PCA分析
pca1 = testcorr111[,2:46]
pcadev=princomp(pca1,cor=T)
summary(pcadev,loadings = T)
comp = pcadev$loadings
write.table(comp, file = "Loadings.txt")
格式示例library(ggplot2)
Loading1.txt格式示例pp = read.table("Loading1.txt",header = T)
pdf("PCA1.pdf")
pcap = ggplot(pp,aes(x=Comp1,y=Comp2,col=Stages,shape=Groups))+
geom_point(size = 4)+
labs(x="PCA1(74.04%)",y="PCA2(12.28%)")+
geom_rug()+
theme_bw()
pcap
dev.off()
网友评论