IRScope 是用用来可视化 叶绿体基因组边界扩张收缩的一个shiny应用。正好自己最近在学习R语言里shiny相关的知识。就准备看看他的代码是怎么写的。目前看懂了一小部分,记录在这里。
他开头定义了很多函数
1、读入genbank函数的文件
read.gb<- function(file){
return(readLines(file))
}
readLines()
函数读入文本文件,结果好像是一个向量,文件中的每行是向量中的一个元素。
2、提取读入的genbank文件的fasta序列
这个代码稍微有点长,,逻辑还有点没看懂
FasExtract<- function(gb){
fasta<-gb[(grep("ORIGIN", gb)+1):length(gb)]
while(fasta[length(fasta)]=="") {
fasta<- fasta[1:length(fasta)-1]
}
while(fasta[length(fasta)]=="//") {
fasta<- fasta[1:length(fasta)-1]
}
fas<-""
for (i in 1:length(fasta)){
sort.let<- sort(unique(c(grep("c", strsplit(fasta[i], " ")[[1]]),grep("a", strsplit(fasta[i], " ")[[1]]), grep("t", strsplit(fasta[i], " ")[[1]]), grep("g", strsplit(fasta[i], " ")[[1]]))))
try(if(length(sort.let)==!6) stop("Check the gb file; the columns of ORIGIN should be 6"))
fasta[i]<- paste(strsplit(fasta[i], " ")[[1]][sort.let[1]],strsplit(fasta[i], " ")[[1]][sort.let[2]],strsplit(fasta[i], " ")[[1]][sort.let[3]],strsplit(fasta[i], " ")[[1]][sort.let[4]],strsplit(fasta[i], " ")[[1]][sort.let[5]],strsplit(fasta[i], " ")[[1]][sort.let[6]], sep="")
fas<-paste(fas, fasta[i], sep="")
}
#fasta[length(fasta)]<- gsub("NA", "", fasta[length(fasta)])
fas<-gsub("NA", "", fas)
strsplit(fas, "")[[1]]
}
3、计算fasta序列GC含量
GC.content<- function(fas){
return((sum(fas=="g")+sum(fas=="c"))/length(fas))
}
4、提取genbank文件中的物种拉丁名
sp.name<- function(gb){
paste(strsplit(gb[2], " ")[[1]][3], strsplit(gb[2], " ")[[1]][4], sep=" ")
}
这四个函数连在一起用就是
> gbfile<-read.gb("sequence.gb")
> fas<-FasExtract(gbfile)
> fas
[1] "a" "t" "g" "a" "t" "t" "g" "a" "a" "g" "t" "t" "t" "t" "t" "c" "t" "a"
[19] "t" "t" "t" "g" "g" "a" "a" "t" "t" "g" "t" "c" "t" "t" "a" "g" "g" "t"
[37] "c" "t" "a" "a" "t" "t" "c" "c" "t" "a" "t" "t" "a" "c" "t" "t" "t" "a"
[55] "g" "c" "t" "g" "g" "a" "t" "t" "a" "t" "t" "t" "g" "t" "a" "a" "c" "t"
[73] "g" "c" "a" "t" "a" "t" "t" "t" "a" "c" "a" "a" "t" "a" "c" "a" "g" "a"
[91] "c" "g" "t" "g" "g" "t" "g" "a" "t" "c" "a" "g" "t" "t" "g" "g" "a" "c"
[109] "t" "t" "t" "t" "g" "a"
> GC.content(fas)
[1] 0.3157895
> sp.name(gbfile)
[1] "Punica granatum"
5、替换fasta序列中的非ATCG的字符
rdnFixer<- function(gb){
seq<- FasExtract(gb)
seq[which(seq=="u")]<-sample(c("t"), length(which(seq=="u")), TRUE)
seq[which(seq=="r")]<-sample(c("a", "g"), length(which(seq=="r")), TRUE)
seq[which(seq=="y")]<-sample(c("c", "t"), length(which(seq=="y")), TRUE)
seq[which(seq=="s")]<-sample(c("c", "g"), length(which(seq=="s")), TRUE)
seq[which(seq=="w")]<-sample(c("a", "t"), length(which(seq=="w")), TRUE)
seq[which(seq=="k")]<-sample(c("g", "t"), length(which(seq=="k")), TRUE)
seq[which(seq=="m")]<-sample(c("c", "a"), length(which(seq=="m")), TRUE)
seq[which(seq=="b")]<-sample(c("c", "g", "t"), length(which(seq=="b")), TRUE)
seq[which(seq=="d")]<-sample(c("a", "g", "t"), length(which(seq=="d")), TRUE)
seq[which(seq=="h")]<-sample(c("c", "a", "t"), length(which(seq=="h")), TRUE)
seq[which(seq=="v")]<-sample(c("c", "a", "g"), length(which(seq=="v")), TRUE)
seq[which(seq=="n")]<-sample(c("c", "g", "t", "a"), length(which(seq=="n")), TRUE)
seq[which(seq=="-")]<-sample(c("c", "g", "t", "a"), length(which(seq=="-")), TRUE)
return(seq)
}
5、鉴定叶绿基因组的四个区域边界信息
这个代码就更长了,代码看起来就更吃力了。如何鉴定四个区域的边界位置逻辑有点看不懂呀!
IRinfo<- function(genome, parallel=TRUE){
#' IR information
#'
#' Detecting the starts and the length of the IR regions on the chloroplast genome
#'
#' @param genome The plastid genome of a species as a simple vector of nucleotides
#' @return a vector of four elements as the start of the first and second IR region, their length and the total lenght of the genome, respectively
#' @export
###Preliminary functions
cirtick<<- function(tick, vector){#circular rotative function
if(tick > length(vector)-1 || tick < 1){
return(vector)
}
else {
return(c(vector[(tick+1):length(vector)], vector[1:tick]))
}
}
genome.comp.rev<<- function(genome){#reverse complement function
gcr<-genome[length(genome):1]
gcr<-gsub("a", "T", gcr)
gcr<-gsub("t", "A", gcr)
gcr<-gsub("g", "C", gcr)
gcr<-gsub("c", "G", gcr)
return(tolower(gcr))
}
#Checked
phase.detector<- function(genome){#detecting the phase difference of the two inverted genomes
shifter=84000 #this shifter is faster mode, to be sure set the value to 80000,
gcr<- genome.comp.rev(genome)
genome<-cirtick(shifter, genome)
l<-length(genome)
track<- numeric(l+1)
track[l+1]<- round(l/4)
for (i in 1:l){
track[i]<- sum(cirtick(i, genome)==gcr)
if ((track[i] - track[l+1])/l > 0.1) {###stable version with 0.07 but changing it for the Guizotia abyssinica, for the parallel p.d fucntion as well
break
}
}
a<<- which(track==max(track))+shifter
if ( a > l){
return(a - l)
}
else {
return(a)
}
}
#the parallel version of the phase.detector function. Set with default 4
p.d<<- function(genome, nCPU=2){#tested in the GlobEnv, it might fail when embedded in the bigger function
genome<<-genome
fun<<- function(shifter){#the function to be passed to the slaves for the parallel computing, the out put is with max 10 second either NA or the phase.detector
gcr<- genome.comp.rev(genome)
cir.genome<<-cirtick(shifter, genome)
l<-length(genome)
track<- numeric(l+1)
track[l+1]<- round(l/4)
s.time<- Sys.time()
no.value<- FALSE
for (i in 1:l){
track[i]<- sum(cirtick(i, cir.genome)==gcr)
i.time<- Sys.time()
if ((track[i] - track[l+1])/l > 0.1) {
break
}
if (i.time - s.time > 11){
no.value<- TRUE
break
}
}
if (no.value) {
a<<- NA
return(a)
}
else {
a<<- which(track==max(track))+shifter
if ( a > l){
return(a - l)
}
else {
return(a)
}
}
}
ini.forw<<- 84000
ini.back<<- ini.forw
mm<<- rep(NA, nCPU)
sfStop()
sfInit(parallel=TRUE, cpus=nCPU)
while(sum(is.na(mm))==length(mm)){
sfExport("genome", "cirtick", "genome.comp.rev", "fun", "mm", "ini.forw", "ini.back")
mm<-unlist(sfLapply(c(seq(ini.forw, ini.forw+nCPU/2*1000-1, 1000), seq(ini.back, ini.back- nCPU/2*1000, -1000)[-1]), fun))
ini.forw<<- ini.forw+nCPU/2*1000
ini.back<<-ini.back-nCPU/2*1000
}
sfStop()
return(unique(mm)[which((is.na(unique(mm))==FALSE))])
}
#Checked
True.sequence.finder<- function(genome, phase.difference){#finding the cordinate of the IR region
#phase.difference<-phase.detector(genome)
true.search<-cirtick(phase.difference, genome)==genome.comp.rev(genome)
true.arm<- round(length(genome)/100)
for (i in 1:length(genome)){
if (sum(true.search[i:(true.arm+i-1)])==true.arm) {
return(i); break
}
}
}
#Checked
IR1start<-function(phase.difference,True.sequence.finder){
return(phase.difference+True.sequence.finder)
}
#Checked
IR.length<- function(IR1start, True.sequence.finder, genome){
s<-IR1start
t<-True.sequence.finder
r<- genome.comp.rev(genome)
T<-cirtick(s, genome)==cirtick(t, r)
Tl<- list()
for (i in 1:50){
Tl[[i]]<- cirtick((s + i), genome)==cirtick(t, r)
}
for (i in 1:50){
Tl[[i+50]]<- cirtick(s , genome)==cirtick((t+i), r)
}
count<-1
while(T[count]){
count<- count+1
}
###jump from INDEL
for (i in 1:50){
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
if(sum(Tl[[i]][(count+1):(count+10)])==10){
while (Tl[[i]][(count+1)]){
count<- ((count+1) + i)
}
}
if(sum(Tl[[i]][(count+2):(count+11)])==10){
while (Tl[[i]][(count+2)]){
count<- ((count+2) + i)
}
}
if(sum(Tl[[i]][(count+3):(count+12)])==10){
while (Tl[[i]][(count+3)]){
count<- ((count+3) + i)
}
}
if(sum(Tl[[i]][(count+4):(count+13)])==10){
while (Tl[[i]][(count+4)]){
count<- ((count+4) + i)
}
}
if(sum(Tl[[i]][(count+5):(count+14)])==10){
while (Tl[[i]][(count+5)]){
count<- ((count+5) + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
if(sum(Tl[[i+50]][(count+1):(count+10)])==10){
while (Tl[[i+50]][(count+1)]){
count<- ((count+1) + i)
}
}
if(sum(Tl[[i+50]][(count+2):(count+11)])==10){
while (Tl[[i+50]][(count+2)]){
count<- ((count+2) + i)
}
}
if(sum(Tl[[i+50]][(count+3):(count+12)])==10){
while (Tl[[i+50]][(count+3)]){
count<- ((count+3) + i)
}
}
if(sum(Tl[[i+50]][(count+4):(count+13)])==10){
while (Tl[[i]][(count+4)]){
count<- ((count+4) + i)
}
}
if(sum(Tl[[i+50]][(count+5):(count+14)])==10){
while (Tl[[i]][(count+5)]){
count<- ((count+5) + i)
}
}
}
##end of jump indel
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
###jump from INDEL
for (i in 1:50){
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
if(sum(Tl[[i]][(count+1):(count+10)])==10){
while (Tl[[i]][(count+1)]){
count<- ((count+1) + i)
}
}
if(sum(Tl[[i]][(count+2):(count+11)])==10){
while (Tl[[i]][(count+2)]){
count<- ((count+2) + i)
}
}
if(sum(Tl[[i]][(count+3):(count+12)])==10){
while (Tl[[i]][(count+3)]){
count<- ((count+3) + i)
}
}
if(sum(Tl[[i]][(count+4):(count+13)])==10){
while (Tl[[i]][(count+4)]){
count<- ((count+4) + i)
}
}
if(sum(Tl[[i]][(count+5):(count+14)])==10){
while (Tl[[i]][(count+5)]){
count<- ((count+5) + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
if(sum(Tl[[i+50]][(count+1):(count+10)])==10){
while (Tl[[i+50]][(count+1)]){
count<- ((count+1) + i)
}
}
if(sum(Tl[[i+50]][(count+2):(count+11)])==10){
while (Tl[[i+50]][(count+2)]){
count<- ((count+2) + i)
}
}
if(sum(Tl[[i+50]][(count+3):(count+12)])==10){
while (Tl[[i+50]][(count+3)]){
count<- ((count+3) + i)
}
}
if(sum(Tl[[i+50]][(count+4):(count+13)])==10){
while (Tl[[i]][(count+4)]){
count<- ((count+4) + i)
}
}
if(sum(Tl[[i+50]][(count+5):(count+14)])==10){
while (Tl[[i]][(count+5)]){
count<- ((count+5) + i)
}
}
}
##end of jump indel
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+25)]==rep(TRUE, 16))==16){
count<- count+10
while(T[count]){
count<- count+1
}
}
if (sum(T[(count+10):(count+75)]==rep(TRUE, 66))==66){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+75)]==rep(TRUE, 66))==66){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+75)]==rep(TRUE, 66))==66){
count<- count+10
while(T[count]){
count<- count+1
}
}
for (i in 1:50){###jump from INDEL
if(sum(Tl[[i]][count:(count+9)])==10){
while (Tl[[i]][count]){
count<- (count + i)
}
}
}
for (i in 1:50){
if(sum(Tl[[i+50]][count:(count+9)])==10){
while (Tl[[i+50]][count]){
count<- (count + i)
}
}
}##end of jump indel
if (sum(T[(count+10):(count+175)]==rep(TRUE, 166))==66){
count<- count+10
while(T[count]){
count<- count+1
}
}
count
}
IR2start<- function(True.sequence.finder, IR.length, genome){
return(length(genome)-(True.sequence.finder+IR.length-2))
}
#declaration
if (parallel){
phase.difference<- p.d(genome)
}
else{
phase.difference<- phase.detector(genome)
}
Trsf<- True.sequence.finder(genome, phase.difference)
IR1s<- IR1start(phase.difference, Trsf)
IR.l<- IR.length(IR1s, Trsf, genome)
IR2s<- IR2start(Trsf, IR.l, genome)
#calculation
return(c(IR1s, IR2s, IR.l, length(genome)))#returning the start of IR one and two follow by their lenght and the lenght of genome
#gives a vector with four elemens as the start of the IRb and IRa and their length and the lenght of the genome sequence
}
如果要单独用这个函数需要加载 sonwfall()
这个包,这里用到了并行计算
输入是 fasta
输出是 第一个反向重复区的位置,小单拷贝区的长度,总长
> gbfile<-read.gb("Taishanhong_CP_genome.gb")
> fas<-FasExtract(gbfile)
> IRinfo(fas)
snowfall 1.84-6.1 initialized (using snow 0.4-3): parallel execution on 2 CPUs.
Stopping cluster
[1] 89022 133173 25466 158638
6、获取genbank文件中所有的基因名
gene.name<-function(gb, type){
if(type=="gene"){
t <- gb[grep(" gene ", gb)+1]
for (i in 1:length(t)){
crude<-strsplit(t[i], " ")[[1]]
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" gene ", gb)+2][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" gene ", gb)+3][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" gene ", gb)+4][i]
crude<- strsplit(t[i], " ")[[1]]
}
crude.name<-crude[which(crude!=rep("", length(crude)))]
t[i]<-gsub("\"", "", strsplit(crude.name, "=")[[1]][2])
}
na<-which(is.na(t))
if(length(na) > 0){
for (i in 1:length(na)){
warning(paste(paste(paste("Gene No.", na[i], " "), paste("is not properly named and is deleted from the list."), ""), paste("Check the gb file on line", which(gb==gb[grep("gene ", gb)+1][na]), ""), ""))
}
}
}
else if(type=="tRNA"){
t <- gb[grep(" tRNA ", gb)+1]
for (i in 1:length(t)){
crude<-strsplit(t[i], " ")[[1]]
if(length(grep("=", crude))==0){
t[i] <- gb[grep("tRNA ", gb)+2][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" tRNA ", gb)+3][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" tRNA ", gb)+4][i]
crude<- strsplit(t[i], " ")[[1]]
}
crude.name<-crude[which(crude!=rep("", length(crude)))]
t[i]<-gsub("\"", "", strsplit(crude.name, "=")[[1]][2])
}
na<-which(is.na(t))
if(length(na) > 0){
for (i in 1:length(na)){
warning(paste(paste(paste("Gene No.", na[i], " "), paste("is not properly named and is deleted from the list."), ""), paste("Check the gb file on line", which(gb==gb[grep("tRNA ", gb)+1]), ""), ""), "\n")
}
}
}
else if(type=="rRNA"){
t <- gb[grep("rRNA ", gb)+1]
for (i in 1:length(t)){
crude<-strsplit(t[i], " ")[[1]]
if(length(grep("=", crude))==0){
t[i] <- gb[grep("rRNA ", gb)+2][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" rRNA ", gb)+3][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" rRNA ", gb)+4][i]
crude<- strsplit(t[i], " ")[[1]]
}
crude.name<-crude[which(crude!=rep("", length(crude)))]
t[i]<-gsub("\"", "", strsplit(crude.name, "=")[[1]][2])
}
na<-which(is.na(t))
if(length(na) > 0){
for (i in 1:length(na)){
warning(paste(paste(paste("Gene No.", na[i], " "), paste("is not properly named and is deleted from the list."), ""), paste("Check the gb file on line", which(gb==gb[grep("gene ", gb)+1][na]), ""), ""))
}
}
}
else if(type=="mRNA"){
t <- gb[grep("mRNA ", gb)+1]
for (i in 1:length(t)){
crude<-strsplit(t[i], " ")[[1]]
if(length(grep("=", crude))==0){
t[i] <- gb[grep("mRNA ", gb)+2][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" mRNA ", gb)+3][i]
crude<- strsplit(t[i], " ")[[1]]
}
if(length(grep("=", crude))==0){#if it cannot find the gene name in the following line go to the next
t[i] <- gb[grep(" mRNA ", gb)+4][i]
crude<- strsplit(t[i], " ")[[1]]
}
crude.name<-crude[which(crude!=rep("", length(crude)))]
t[i]<-gsub("\"", "", strsplit(crude.name, "=")[[1]][2])
}
na<-which(is.na(t))
if(length(na) > 0){
for (i in 1:length(na)){
warning(paste(paste(paste("Gene No.", na[i], " "), paste("is not properly named and is deleted from the list."), ""), paste("Check the gb file on line", which(gb==gb[grep("gene ", gb)+1][na]), ""), ""))
}
}
}
else {
stop("The type should be defined as either gene or tRNA")
}
t<-t[!is.na(t)]
return(t)
#intermediate gene name function to substract the gene names of either gene or tRNA, mRNA or rRNA from their second line information(or third), the input is the gb file.
}
可以获得所有的基因名称,tRNA或者rRNA的名称,但是不能够获得蛋白编码基因的名称,但是应该可以改,把代码里的mRNA统一换成CDS应该就可以了
> gbfile<-read.gb("Taishanhong_CP_genome.gb")
> gene.name(gbfile,'rRNA')
[1] "rrn16" "rrn23" "rrn4.5" "rrn5" "rrn5" "rrn4.5" "rrn23" "rrn16"
还有好多,今天就到这里了。
重点是画图函数,但是他的画图函数好长好长啊!
欢迎大家关注我的公众号
小明的数据分析笔记本
网友评论