美文网首页
regression network version 1.1

regression network version 1.1

作者: 为了梦走一遭 | 来源:发表于2020-08-26 03:24 被阅读0次

library(stats)
library(msgps)
library(glmnet)
set.seed(6300)

#################

setwd("/Users/mumu/Documents/Documents/R_DataAnalyzed/lm_mgps/")
dat<-read.table(file="Data2.txt", head=F, fill=F, sep="\t")
rownames(dat)<-paste("TF", seq(1:100), "")
colnames(dat)<-paste("Sample", seq(1:373), "")

tf.names<-paste("TF", seq(1,100,20), "")

train_id <- sample(1:ncol(dat), ncol(dat)*0.66)
test_id<-(-train_id)

train_dat<-t(dat[,train_id])
test_dat<-t(dat[,test_id])

lm.out<-LM(train_dat)
msgps.out<-MSGPS(train_dat)
glmnet.out<-GLMNET(train_dat)

lm.out[1:10,1:10]
msgps.out[1:10,1:10]
glmnet.out[1:10,1:10]

########### test the performance by std

rsq.lm<-RSQ(test_dat, lm.out)
rsq.msgps<-RSQ(test_dat,msgps.out)
rsq.glmnet<-RSQ(test_dat,glmnet.out)

summary(rsq.lm)
summary(rsq.msgps)
summary(rsq.glmnet)

#####################

lm.out2<-LM2(train_dat,tf.names)
msgps.out2<-MSGPS2(train_dat,tf.names)
glmnet.out2<-GLMNET2(train_dat,tf.names)

lm.out2[1:10,1:5]
msgps.out2[1:10,1:5]
glmnet.out2[1:10,1:5]

rsq.lm2<-RSQ2(test_dat,tf.names, lm.out2)
rsq.msgps2<-RSQ2(test_dat,tf.names, msgps.out2)
rsq.glmnet2<-RSQ2(test_dat,tf.names, glmnet.out2)

summary(rsq.lm2)
summary(rsq.msgps2)
summary(rsq.glmnet2)

input data format : a matrix with row names<-sample, colnames<-genes

LM<-function(dat){
size<-dim(dat)[2]
reg.coef<-matrix(0,size,size+1)

for (i in 1:size){

lm_out<-lm(dat[,i]~dat[,-i])
coef<-lm_out$coefficients

# summary(lm_out)

for (j in 1:(size+1)){
  
    if (j<i){
      
    reg.coef[i,j]=coef[j]
    }else if (j==i){
        reg.coef[i,j]=0
    }else{
        
        reg.coef[i,j]=coef[j-1]
    
    }
}

}
return(reg.coef)

}

LM2<-function(dat, tf.names){
size<-dim(dat)[2]
len.coef<-length(tf.names)+1

reg.coef<-matrix(0,ncol(dat), length(tf.names)+1)

tf.pos<-match(tf.names, colnames(dat))
tf.dat<-dat[,tf.pos]

for (i in 1:size){

print (i)
i.pos<-match(row.names(dat)[i],tf.names)

if (is.na(i.pos)==TRUE){
  
  lm_out<-lm(dat[,i]~tf.dat)
  coef<-lm_out$coefficients
  
  # summary(lm_out)
  
  for (j in 1:len.coef){
    reg.coef[i,j]=coef[j]}
  
}else{
  
  lm_out<-lm(dat[,i]~tf.dat[,-i.pos])
  coef<-lm_out$coefficients
  
  if (j<i.pos){
    reg.coef[i,j]=coef[j]
  }else if (j==i.pos){
    reg.coef[i,j]<-0
  }else{
    reg.coef[i,j]=coef[j-1]
  }
  
}

}
return(reg.coef)
}

MSGPS<-function(dat){
size<-dim(dat)[2]
reg.coef<-matrix(0,size,size+1)

for (i in 1:size){

print (i)
msgps_out<-msgps(dat[,-i], dat[,i])
coef<-coef(msgps_out)[,2]

for (j in 1:(size+1)){
  
    if (j<i){
    reg.coef[i,j]=coef[j]
    }else if (j==i){
        reg.coef[i,j]=0
    }else{
        reg.coef[i,j]=coef[j-1]
    }
}

}
return (reg.coef)
}

MSGPS2<-function(dat, tf.names){
size<-dim(dat)[2]
len.coef<-length(tf.names)+1

reg.coef<-matrix(0,ncol(dat), length(tf.names)+1)

tf.pos<-match(tf.names, colnames(dat))
tf.dat<-dat[,tf.pos]

for (i in 1:size){

print (i)
i.pos<-match(row.names(dat)[i],tf.names)

if (is.na(i.pos)==TRUE){
  
  msgps_out<-msgps(tf.dat, dat[,i])
  coef<-coef(msgps_out)[,2]
  
  
  # summary(lm_out)
  
  for (j in 1:len.coef){
    reg.coef[i,j]=coef[j]}
  
}else{
  
  msgps_out<-msgps(tf.dat[,-i.pos], dat[,i])
  coef<-coef(msgps_out)[,2]
  
  if (j<i.pos){
    reg.coef[i,j]=coef[j]
  }else if (j==i.pos){
    reg.coef[i,j]<-0
  }else{
    reg.coef[i,j]=coef[j-1]
  }
  
}

}
return(reg.coef)
}

glmnet

GLMNET<-function(dat){

size<-dim(dat)[2]
reg.coef<-matrix(0,size,size+1)

for (i in 1:size){

print (i)
cv_output <- cv.glmnet(dat[,-i], dat[,i],
alpha = 1)

identifying best lamda

best_lam <- cv_output$lambda.min
best_lam

lasso_best <- glmnet(dat[,-i], dat[,i], alpha = 1, lambda = best_lam)
coef<-coef(lasso_best)

for (j in 1:(size+1)){

if (j<i){
  reg.coef[i,j]=coef[j]
}else if (j==i){
  reg.coef[i,j]<-0
}else{
  reg.coef[i,j]<-coef[j-1]
}

}
}
return(reg.coef)
}

GLMNET2<-function(dat, tf.names){
size<-dim(dat)[2]
len.coef<-length(tf.names)+1

reg.coef<-matrix(0,ncol(dat), length(tf.names)+1)

tf.pos<-match(tf.names, colnames(dat))
tf.dat<-dat[,tf.pos]

for (i in 1:size){

print (i)
i.pos<-match(row.names(dat)[i],tf.names)



if (is.na(i.pos)==TRUE){
  

  
  cv_output <- cv.glmnet(tf.dat, dat[,i],
                         alpha = 1)
  
  best_lam <- cv_output$lambda.min
  best_lam
  
  lasso_best <- glmnet(tf.dat, dat[,i], alpha = 1, lambda = best_lam)
  coef<-coef(lasso_best)
  
  # summary(lm_out)
  
  for (j in 1:len.coef){
    reg.coef[i,j]=coef[j]}
  
}else{

  # identifying best lamda
  
  cv_output <- cv.glmnet(tf.dat[,-i.pos], dat[,i],
                         alpha = 1)
  
  best_lam <- cv_output$lambda.min
  best_lam
  
  lasso_best <- glmnet(tf.dat[,-i.pos], dat[,i], alpha = 1, lambda = best_lam)
  coef<-coef(lasso_best)
  
  if (j<i.pos){
    reg.coef[i,j]=coef[j]
  }else if (j==i.pos){
    reg.coef[i,j]<-0
  }else{
    reg.coef[i,j]=coef[j-1]
  }
  
}

}
return(reg.coef)
}

######### get the rsq of test data set

rss <- sum((preds - actual) ^ 2)

tss <- sum((actual - mean(actual)) ^ 2)

rsq <- 1 - rss/tss

rsq

RSQ<-function(dat,reg.coef){

size<-dim(dat)[2]
dif<-matrix(0,dim(dat)[1],dim(dat)[2])

rsq<-rep(0,size)
rss<-rep(0,size)
tss<-rep(0,size)

predict.x<-dif
reg.coef.x<-reg.coef[,-1]

for (i in 1:dim(dat)[1]){
print (i)

for (j in 1:dim(dat)[2]){
  predict.x[i,j]<-reg.coef[j,1]+sum(reg.coef.x[j,]*dat[i,])
  dif[i,j]<-dat[i,j]-predict.x[i,j]
}

}

predict.x[predict.x<0]<-0

for (j in 1:size){
print (j)
rss[j]<-sum(dif[,j]^2)
tss[j]<-sum((dat[,j]-mean(predict.x[,j]))^2)
rsq[j]<-1-rss[j]/tss[j]
}

return (rsq)
}

RSQ2<-function(dat,tf.names, reg.coef){

size<-dim(dat)[2]
tf.pos<-match(tf.names, colnames(dat))
tf.dat<-dat[,tf.pos]

dif<-matrix(0,dim(dat)[1],dim(dat)[2])

rsq<-rep(0,size)
rss<-rep(0,size)
tss<-rep(0,size)

predict.x<-dif
reg.coef.x<-reg.coef[,-1]

for (i in 1:dim(dat)[1]){
print (i)

for (j in 1:dim(dat)[2]){
  predict.x[i,j]<-reg.coef[j,1]+sum(reg.coef.x[j,]*tf.dat[i,])
  dif[i,j]<-dat[i,j]-predict.x[i,j]
}

}

predict.x[predict.x<0]<-0

for (j in 1:size){
print (j)
rss[j]<-sum(dif[,j]^2)
tss[j]<-sum((dat[,j]-mean(predict.x[,j]))^2)
rsq[j]<-1-rss[j]/tss[j]
}

return (rsq)
}

########### v2.0 input [gene_expression_matrix, names of TFs]

########### input [gene_expression_matrix, names of TFs, reg network matrix]

相关文章

网友评论

      本文标题:regression network version 1.1

      本文链接:https://www.haomeiwen.com/subject/svntsktx.html