美文网首页
RBP AS生信流程(N)RBP与AS相关性

RBP AS生信流程(N)RBP与AS相关性

作者: cHarden13 | 来源:发表于2021-01-24 15:24 被阅读0次
    RBP = read.table("RBPexp.txt", row.names=1 ,header=T,sep="\t",check.names=F) 
    AS = read.table("AS.txt", row.names=1 ,header=T,sep="\t",check.names=F)  
    rownames(AS)=gsub("\\|","\\-",rownames(AS))
    corFilter=0.6             
    pvalueFilter=0.05
    outTab=data.frame()
    for(i in row.names(RBP)){
      if(sd(RBP[i,])>1){
          for(j in row.names(AS)){
             x=as.numeric(RBP[i,])
             y=as.numeric(AS[j,])
             corT=cor.test(x,y)
             cor=corT$estimate
             pvalue=corT$p.value
             if((cor>corFilter) & (pvalue<pvalueFilter)){
                 outTab=rbind(outTab,cbind(SF=i,AS=j,cor,pvalue,Regulation="postive"))
             }
             if((cor< -corFilter) & (pvalue<pvalueFilter)){
                 outTab=rbind(outTab,cbind(RBP=i,AS=j,cor,pvalue,Regulation="negative"))
             }
          }
        }
    }
    head(outTab)
    write.table(file="corResult.txt",outTab,sep="\t",quote=F,row.names=F)      
    
    library(openxlsx) #读取xlsx文件
    library(tidyr) #separate函数
    #第一步:整理出 基因|ID|剪接类型 
    RI <- read.xlsx("RI.xlsx")
    head(RI[,21])
    RI$splicetype <- paste(RI$geneSymbol,RI$ID,"RI",sep = "|")
    head(RI[,24])
    RI2 <- RI[,c(21,22,24)]
    RI2 <- RI2[,c(3,1,2)]
    head(RI2)
    RI2 <- separate(RI2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
    "HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                    sep = ",",
                    remove =TRUE)
    RI2 <- separate(RI2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                           "normal5","normal6","normal7","normal8"),
                    sep = ",",remove =TRUE)
    head(RI2)
    rownames(RI2) <- RI2$splicetype
    RI2 <- RI2[,-1]
    head(RI2)
    SE <- read.xlsx("SE.xlsx")
    head(SE[,21])
    SE$splicetype <- paste(SE$geneSymbol,SE$ID,"SE",sep = "|")
    head(SE[,24])
    SE2 <- SE[,c(21,22,24)]
    SE2 <- SE2[,c(3,1,2)]
    head(SE2)
    SE2 <- separate(SE2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
    "HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                    sep = ",",
                    remove =TRUE)
    SE2 <- separate(SE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                           "normal5","normal6","normal7","normal8"),
                    sep = ",",remove =TRUE)
    head(SE2)
    rownames(SE2) <- SE2$splicetype
    SE2 <- SE2[,-1]
    head(SE2)
    A3SS <- read.xlsx("A3SS.xlsx")
    head(A3SS[,21])
    A3SS$splicetype <- paste(A3SS$geneSymbol,A3SS$ID,"A3SS",sep = "|")
    head(A3SS[,24])
    A3SS2 <- A3SS[,c(21,22,24)]
    A3SS2 <- A3SS2[,c(3,1,2)]
    head(A3SS2)
    A3SS2 <- separate(A3SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
    "HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                    sep = ",",
                    remove =TRUE)
    A3SS2 <- separate(A3SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                           "normal5","normal6","normal7","normal8"),
                    sep = ",",remove =TRUE)
    head(A3SS2)
    rownames(A3SS2) <- A3SS2$splicetype
    A3SS2 <- A3SS2[,-1]
    head(A3SS2)
    A5SS <- read.xlsx("A5SS.xlsx")
    head(A5SS[,21])
    A5SS$splicetype <- paste(A5SS$geneSymbol,A5SS$ID,"A5SS",sep = "|")
    head(A5SS[,24])
    A5SS2 <- A5SS[,c(21,22,24)]
    A5SS2 <- A5SS2[,c(3,1,2)]
    head(A5SS2)
    A5SS2 <- separate(A5SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
    "HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                      sep = ",",
                      remove =TRUE)
    A5SS2 <- separate(A5SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                               "normal5","normal6","normal7","normal8"),
                      sep = ",",remove =TRUE)
    head(A5SS2)
    rownames(A5SS2) <- A5SS2$splicetype
    A5SS2 <- A5SS2[,-1]
    head(A5SS2)
    MXE <- read.xlsx("MXE.xlsx") 
    MXE$splicetype <- paste(MXE$geneSymbol,MXE$ID,"MXE",sep = "|")
    head(MXE[,26])
    MXE2 <- MXE[,c(23,24,26)]
    MXE2 <- MXE2[,c(3,1,2)]
    head(MXE2)
    MXE2 <- separate(MXE2,IncLevel1,into = c("HF1","HF2","HF3","HF4",
    "HF5","HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                      sep = ",",
                      remove =TRUE)
    MXE2 <- separate(MXE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                               "normal5","normal6","normal7","normal8"),
                      sep = ",",remove =TRUE)
    head(MXE2)
    rownames(MXE2) <- MXE2$splicetype
    MXE2 <- MXE2[,-1]
    head(MXE2)
    allsplice <- rbind(RI2,SE2,A3SS2,A5SS2,MXE2)
    #第二步:过滤掉在大于四分之一的样本中PSI值为缺失值的可变剪接事件
    allsplice <- allsplice[apply(allsplice, 
                                  1,                            
                                  function(x)sum(x!="NA")>17),]  
    #第三步:过滤波动太小的可变剪接事件
    exp=as.matrix(allsplice)
    mat=impute.knn(exp)
    data=mat$data
    data=data[rowMeans(data)>0.05,]
    genes=c()
    for(i in row.names(data)){
      if(sd(data[i,])>0.2){       
        genes=c(genes,i)
      }
    }  
    all=data[genes,]
    all <- cbind(id=row.names(all),all)
    write.table(all,file="AS.txt",sep="\t",row.names=F,quote=F)
    

    相关文章

      网友评论

          本文标题:RBP AS生信流程(N)RBP与AS相关性

          本文链接:https://www.haomeiwen.com/subject/gtumzktx.html