美文网首页
WGCNA衍生:python识别模块间基因互作

WGCNA衍生:python识别模块间基因互作

作者: 挽山 | 来源:发表于2020-06-30 08:01 被阅读0次

1. 构建随机互作网络

# -*- coding: utf-8 -*-
"""
Created on Mon Aug 20 10:47:00 2018

@author: xllix
"""

#coding=gbk
#python -m pip install --upgrade pip 更新pip

import networkx
import random
import os

### String_human_PPIs作为背景集进行crosstalk分析
### 用python随机1000次,得到1000个随机网络 randomRRI1-1000
f_PPIs = open(r'C:\\Users\\xllix\\Documents\\9-Symbol_900_si.txt') #900分

random_path = 'C:\\Users\\xllix\\Documents\\9-randomNet'

error_path = 'C:\\Users\\xllix\\Documents\\9-randomError'

originalNet = networkx.Graph() #原始无向网络 空图
allGene = originalNet.nodes() #节点
netDegree = originalNet.degree() #度

for line in f_PPIs.readlines(): 
    gene = line.strip().split()
    if gene[0] != gene[1]:
        originalNet.add_edge(gene[0], gene[1]) #边
f_PPIs.close()

print (originalNet.number_of_nodes(), originalNet.number_of_edges())
print ('---------------infor of original net---------------')
print ('---------------infor of random PPIs----------------')


for i in range(1,1001):
    randomPPIsname = 'randomPPIs'+str(i)
    errorName = 'error' + str(i)
    randomPPIs = networkx.Graph()
    existedNode = []
    f_error_out = open(os.path.join(error_path, errorName+'.txt'), 'w')
    allGene = random.sample(allGene, len(allGene))
    for g1 in allGene:
        existedNode.append(g1)
        randomPPIs.add_node(g1)
        existedNode1 = randomPPIs.neighbors(g1)
        g1_degree = randomPPIs.degree(g1)
        remainGene = list(set(allGene).difference(set(existedNode)).difference(set(existedNode1)))
        try:
            randGene = random.sample(remainGene, netDegree[g1]-g1_degree)
        except:
            randGene = random.sample(remainGene, len(remainGene))
            f_error_out.write(g1 + '\t' + str(originalNet.degree(g1)) + '\t' + str(randomPPIs.degree(g1) + len(remainGene)) + '\n')
        for g2 in randGene:
            randomPPIs.add_edge(g1, g2)
            if randomPPIs.degree(g2) == netDegree[g2]:
                existedNode.append(g2)
    f_error_out.close()
    print (i,randomPPIs.number_of_nodes(), randomPPIs.number_of_edges())
    
    f_random_out = open(os.path.join(random_path, randomPPIsname+'.txt'), 'w')
    for p1 in randomPPIs.nodes():
        for p2 in randomPPIs.neighbors(p1):
            pair1 = p1 + '\t' + p2
            f_random_out.write(pair1 + '\n')
    f_random_out.close()

2. 用python寻找交互关系

# -*- coding: utf-8 -*-
"""
Created on Mon Aug 20 21:44:33 2018

@author: xllix
"""

### 用python寻找crosstalk交互关系
import os
f_module = open(r'C:\\Users\\xllix\\Documents\\test\\Random_set_m2m4_2.txt') #同一模块的基因在一行
f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Crosstalk_Result.txt', 'w')
f_realNet = 'C:\\Users\\xllix\\Documents\\9-Symbol_900_si.txt'
path_randNet = 'C:\\Users\\xllix\\Documents\\9-randomNet'

module1 = {}
data_module = f_module.readlines()
f_module.close()

nModule = len(data_module)
for index, l_module in enumerate(data_module):#enumerate用于一个可遍历的数据对象,同时列出数据和数据下标
    #print(index,l_module)
    l_m = l_module.strip().split()
    moduleName = 'm' + str(index+1)
    print(moduleName)
    module1[moduleName] = l_m #{}字典,冒号':'分开键和值,逗号','隔开组。[]可变列表
    print(l_m)
f_module.close()
print ('---------------infor of module gene---------------')
print ('---------------infor of module crosstalk----------')

def calInteractionNu(m1, m2, net): 
    f_net = open(net) 
    countCross = 0
    L=[]
    R=[]
    for l_net in f_net.readlines():
        pair = l_net.strip().split()
        if pair[0] in m1 and pair[1] in m2:
            countCross += 1
            L.append(pair)
    f_net.close()
    R.append(countCross)
    R.append(L)
    return (R)

module2 = module1
f_crosstalk.write('module1  module2 num_of_crosstalk    p_value crosstalk\n')

print ("'''自定义函数(找互作次数)'''")
done = []
for m1k, m1v in module1.items():
    for m2k, m2v in module2.items():
        pairModule1 = m1k+'\t'+m2k
        pairModule2 = m2k+'\t'+m1k
        if pairModule1 not in done and pairModule2 not in done:
            done.append(pairModule1)
            if m1k != m2k:
                nRealPair = calInteractionNu(m1v, m2v, f_realNet)[0]
                crosstalk = calInteractionNu(m1v, m2v, f_realNet)[1]
                if nRealPair > 0:
                    for root, dirs, randFiles in os.walk(path_randNet):
                        countLagerRand = 0
                        for randF in randFiles:
                            f_randNet = os.path.join(path_randNet, randF)
                            nRandPair = calInteractionNu(m1v, m2v, f_randNet)[0]
                            if nRealPair < nRandPair:
                                countLagerRand += 1
                        p_value = float(countLagerRand)/len(randFiles)
                        print(float(countLagerRand))
                        f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(nRealPair)+'\t'+str(p_value)+'\t'+str(crosstalk)+'\n')
f_crosstalk.close()


3. 筛选显著crosstalk

### 用python筛选显著crosstalk
f_crosstalk = open(r'E:\\ASD-自噬毕业课题\\开学前\\821\\Crosstalk_Result.txt')
f_out = open(r'E:\\ASD-自噬毕业课题\\开学前\\821\\Crosstalk_significant_module.txt', 'w')

f_out.write('module1    module2 num_of_crosstalk    p_value\n')

f_crosstalk.readline()
for line in f_crosstalk.readlines():
    l = line.strip().split()
    if float(l[3]) < 0.05:
        f_out.write(line)
f_crosstalk.close()
f_out.close()

4. 以比较互作次数为主

# -*- coding: utf-8 -*-
"""
Created on Wed Dec 19 17:57:15 2018

@author: xllix
"""

import os
f_realNet = 'C:\\Users\\xllix\\Documents\\crosstalk_possess\\9-Symbol_900_si.txt' #真实PPI
f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\3\\Crosstalk_between_modules.txt', 'a+') 

#f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Rand_sets\\Crosstalk_Results_rand_m4m2.txt', 'a+') #随机
#f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Real_sets\\Crosstalk_Results_real.txt', 'a+') #真实

path="C:\\Users\\xllix\\Documents\\3" 

#path="C:\\Users\\xllix\\Documents\\Rand_sets\\rand_set_m4m2\\" #随机
#path="C:\\Users\\xllix\\Documents\\Real_sets\\" #真实

path_list=os.listdir(path)
print(len(path_list))

f_crosstalk.write('from to files num_of_crosstalk crosstalk\n')
        
for files in path_list:
    print(os.path.join(path,files))
    
    f_module= open(r'C:\\Users\\xllix\\Documents\\3\\'+files) 
    #f_module= open(r'C:\\Users\\xllix\\Documents\\Rand_sets\\rand_set_m4m2\\'+files) #随机
    #f_module= open(r'C:\\Users\\xllix\\Documents\\Real_sets\\'+files) #真实
    
    module1 = {}
    data_module = f_module.readlines()
    f_module.close()
    
    nModule = len(data_module)
    print ('---------------infor of module gene---------------')
    for index, l_module in enumerate(data_module):#enumerate用于一个可遍历的数据对象,同时列出数据和数据下标
        #print(index,l_module)
        l_m = l_module.strip().split()
        moduleName = 'm' + str(index+1)
        print(moduleName)
        module1[moduleName] = l_m #{}字典,冒号':'分开键和值,逗号','隔开组。[]可变列表
        print(l_m)
        f_module.close()
        
    print ('---------------infor of module crosstalk----------')
    def calInteractionNu(m1, m2, net): 
      f_net = open(net) 
      countCross = 0
      L=[]
      R=[]
      for l_net in f_net.readlines():
          pair = l_net.strip().split()
          if pair[0] in m1 and pair[1] in m2:
              countCross += 1
              L.append(pair)
      f_net.close()
      R.append(countCross)
      R.append(L)
      return (R)
  
    module2 = module1
    
    print ("'''自定义函数(找互作次数)'''")
    done = []
    for m1k, m1v in module1.items():
        for m2k, m2v in module2.items():
            pairModule1 = m1k+'\t'+m2k
            pairModule2 = m2k+'\t'+m1k
            if pairModule1 not in done and pairModule2 not in done:
                done.append(pairModule1)
                if m1k != m2k:
                    nRealPair = calInteractionNu(m1v, m2v, f_realNet)[0]
                    print(nRealPair)
                    crosstalk = calInteractionNu(m1v, m2v, f_realNet)[1]
                    f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(files)+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')
                    #f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')
                    #if nRealPair > 78:
                    #    print('yeah!!')
                    #    f_crosstalk.write(str(files)+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')

f_crosstalk.close()             

相关文章

网友评论

      本文标题:WGCNA衍生:python识别模块间基因互作

      本文链接:https://www.haomeiwen.com/subject/ycqvqhtx.html