#数据加载
mouse_geneOrderingFile<-read.table('/share/nas1/Data/DataBase/Genome/Mus_musculus/Transcriptome/Mm10/genes/genes.gtf', header=F, sep = '\t')
View(mouse_geneOrderingFile)
#列选择
mouse_geneOrderingFile1<-mouse_geneOrderingFile[, c(9, 1, 4, 5)]
#基因名提取
mouse_geneOrderingFile1$V9<-gsub('.*gene_name ', '', mouse_geneOrderingFile1$V9)
mouse_geneOrderingFile1$V9<-gsub(';.*', '', mouse_geneOrderingFile1$V9)
head(mouse_geneOrderingFile1)
##基因排序
View(mouse_geneOrderingFile1)
mouse_geneOrderingFile2<-subset(mouse_geneOrderingFile1, subset=V1%in%c(1:19, "MT", "X", "Y" ))
dim(mouse_geneOrderingFile1)
dim(mouse_geneOrderingFile2)
order_df<-data.frame(chr=c(1:19,'MT', 'X', 'Y'), order=c(1:22), stringsAsFactors = F)
mouse_geneOrderingFile2$order<-mapvalues(mouse_geneOrderingFile2$V1, as.character(order_df$chr), as.character(order_df$order))
mouse_geneOrderingFile2$order<-as.numeric(mouse_geneOrderingFile2$order)
View(mouse_geneOrderingFile2)
mouse_geneOrderingFile2<-mouse_geneOrderingFile2[sort(mouse_geneOrderingFile2$order, index.return=T)$ix,]
View(mouse_geneOrderingFile2)
mouse_geneOrderingFile2$order<-NULL
##去重
mouse_geneOrderingFile3<-mouse_geneOrderingFile2[!duplicated(mouse_geneOrderingFile2$V9),]
##保存
write.table(mouse_geneOrderingFile3, file = '/share/nas1/Data/Users/luohb/tools_20201026/mouse_inferCNV/mm_geneOrderingFile.txt',quote = F, sep = '\t', row.names = F, col.names = F)
网友评论