美文网首页
WGCNA衍生:python识别模块间基因互作

WGCNA衍生:python识别模块间基因互作

作者: 挽山 | 来源:发表于2020-06-30 08:01 被阅读0次

    1. 构建随机互作网络

    # -*- coding: utf-8 -*-
    """
    Created on Mon Aug 20 10:47:00 2018
    
    @author: xllix
    """
    
    #coding=gbk
    #python -m pip install --upgrade pip 更新pip
    
    import networkx
    import random
    import os
    
    ### String_human_PPIs作为背景集进行crosstalk分析
    ### 用python随机1000次,得到1000个随机网络 randomRRI1-1000
    f_PPIs = open(r'C:\\Users\\xllix\\Documents\\9-Symbol_900_si.txt') #900分
    
    random_path = 'C:\\Users\\xllix\\Documents\\9-randomNet'
    
    error_path = 'C:\\Users\\xllix\\Documents\\9-randomError'
    
    originalNet = networkx.Graph() #原始无向网络 空图
    allGene = originalNet.nodes() #节点
    netDegree = originalNet.degree() #度
    
    for line in f_PPIs.readlines(): 
        gene = line.strip().split()
        if gene[0] != gene[1]:
            originalNet.add_edge(gene[0], gene[1]) #边
    f_PPIs.close()
    
    print (originalNet.number_of_nodes(), originalNet.number_of_edges())
    print ('---------------infor of original net---------------')
    print ('---------------infor of random PPIs----------------')
    
    
    for i in range(1,1001):
        randomPPIsname = 'randomPPIs'+str(i)
        errorName = 'error' + str(i)
        randomPPIs = networkx.Graph()
        existedNode = []
        f_error_out = open(os.path.join(error_path, errorName+'.txt'), 'w')
        allGene = random.sample(allGene, len(allGene))
        for g1 in allGene:
            existedNode.append(g1)
            randomPPIs.add_node(g1)
            existedNode1 = randomPPIs.neighbors(g1)
            g1_degree = randomPPIs.degree(g1)
            remainGene = list(set(allGene).difference(set(existedNode)).difference(set(existedNode1)))
            try:
                randGene = random.sample(remainGene, netDegree[g1]-g1_degree)
            except:
                randGene = random.sample(remainGene, len(remainGene))
                f_error_out.write(g1 + '\t' + str(originalNet.degree(g1)) + '\t' + str(randomPPIs.degree(g1) + len(remainGene)) + '\n')
            for g2 in randGene:
                randomPPIs.add_edge(g1, g2)
                if randomPPIs.degree(g2) == netDegree[g2]:
                    existedNode.append(g2)
        f_error_out.close()
        print (i,randomPPIs.number_of_nodes(), randomPPIs.number_of_edges())
        
        f_random_out = open(os.path.join(random_path, randomPPIsname+'.txt'), 'w')
        for p1 in randomPPIs.nodes():
            for p2 in randomPPIs.neighbors(p1):
                pair1 = p1 + '\t' + p2
                f_random_out.write(pair1 + '\n')
        f_random_out.close()
    
    

    2. 用python寻找交互关系

    # -*- coding: utf-8 -*-
    """
    Created on Mon Aug 20 21:44:33 2018
    
    @author: xllix
    """
    
    ### 用python寻找crosstalk交互关系
    import os
    f_module = open(r'C:\\Users\\xllix\\Documents\\test\\Random_set_m2m4_2.txt') #同一模块的基因在一行
    f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Crosstalk_Result.txt', 'w')
    f_realNet = 'C:\\Users\\xllix\\Documents\\9-Symbol_900_si.txt'
    path_randNet = 'C:\\Users\\xllix\\Documents\\9-randomNet'
    
    module1 = {}
    data_module = f_module.readlines()
    f_module.close()
    
    nModule = len(data_module)
    for index, l_module in enumerate(data_module):#enumerate用于一个可遍历的数据对象,同时列出数据和数据下标
        #print(index,l_module)
        l_m = l_module.strip().split()
        moduleName = 'm' + str(index+1)
        print(moduleName)
        module1[moduleName] = l_m #{}字典,冒号':'分开键和值,逗号','隔开组。[]可变列表
        print(l_m)
    f_module.close()
    print ('---------------infor of module gene---------------')
    print ('---------------infor of module crosstalk----------')
    
    def calInteractionNu(m1, m2, net): 
        f_net = open(net) 
        countCross = 0
        L=[]
        R=[]
        for l_net in f_net.readlines():
            pair = l_net.strip().split()
            if pair[0] in m1 and pair[1] in m2:
                countCross += 1
                L.append(pair)
        f_net.close()
        R.append(countCross)
        R.append(L)
        return (R)
    
    module2 = module1
    f_crosstalk.write('module1  module2 num_of_crosstalk    p_value crosstalk\n')
    
    print ("'''自定义函数(找互作次数)'''")
    done = []
    for m1k, m1v in module1.items():
        for m2k, m2v in module2.items():
            pairModule1 = m1k+'\t'+m2k
            pairModule2 = m2k+'\t'+m1k
            if pairModule1 not in done and pairModule2 not in done:
                done.append(pairModule1)
                if m1k != m2k:
                    nRealPair = calInteractionNu(m1v, m2v, f_realNet)[0]
                    crosstalk = calInteractionNu(m1v, m2v, f_realNet)[1]
                    if nRealPair > 0:
                        for root, dirs, randFiles in os.walk(path_randNet):
                            countLagerRand = 0
                            for randF in randFiles:
                                f_randNet = os.path.join(path_randNet, randF)
                                nRandPair = calInteractionNu(m1v, m2v, f_randNet)[0]
                                if nRealPair < nRandPair:
                                    countLagerRand += 1
                            p_value = float(countLagerRand)/len(randFiles)
                            print(float(countLagerRand))
                            f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(nRealPair)+'\t'+str(p_value)+'\t'+str(crosstalk)+'\n')
    f_crosstalk.close()
    
    
    

    3. 筛选显著crosstalk

    ### 用python筛选显著crosstalk
    f_crosstalk = open(r'E:\\ASD-自噬毕业课题\\开学前\\821\\Crosstalk_Result.txt')
    f_out = open(r'E:\\ASD-自噬毕业课题\\开学前\\821\\Crosstalk_significant_module.txt', 'w')
    
    f_out.write('module1    module2 num_of_crosstalk    p_value\n')
    
    f_crosstalk.readline()
    for line in f_crosstalk.readlines():
        l = line.strip().split()
        if float(l[3]) < 0.05:
            f_out.write(line)
    f_crosstalk.close()
    f_out.close()
    

    4. 以比较互作次数为主

    # -*- coding: utf-8 -*-
    """
    Created on Wed Dec 19 17:57:15 2018
    
    @author: xllix
    """
    
    import os
    f_realNet = 'C:\\Users\\xllix\\Documents\\crosstalk_possess\\9-Symbol_900_si.txt' #真实PPI
    f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\3\\Crosstalk_between_modules.txt', 'a+') 
    
    #f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Rand_sets\\Crosstalk_Results_rand_m4m2.txt', 'a+') #随机
    #f_crosstalk = open(r'C:\\Users\\xllix\\Documents\\Real_sets\\Crosstalk_Results_real.txt', 'a+') #真实
    
    path="C:\\Users\\xllix\\Documents\\3" 
    
    #path="C:\\Users\\xllix\\Documents\\Rand_sets\\rand_set_m4m2\\" #随机
    #path="C:\\Users\\xllix\\Documents\\Real_sets\\" #真实
    
    path_list=os.listdir(path)
    print(len(path_list))
    
    f_crosstalk.write('from to files num_of_crosstalk crosstalk\n')
            
    for files in path_list:
        print(os.path.join(path,files))
        
        f_module= open(r'C:\\Users\\xllix\\Documents\\3\\'+files) 
        #f_module= open(r'C:\\Users\\xllix\\Documents\\Rand_sets\\rand_set_m4m2\\'+files) #随机
        #f_module= open(r'C:\\Users\\xllix\\Documents\\Real_sets\\'+files) #真实
        
        module1 = {}
        data_module = f_module.readlines()
        f_module.close()
        
        nModule = len(data_module)
        print ('---------------infor of module gene---------------')
        for index, l_module in enumerate(data_module):#enumerate用于一个可遍历的数据对象,同时列出数据和数据下标
            #print(index,l_module)
            l_m = l_module.strip().split()
            moduleName = 'm' + str(index+1)
            print(moduleName)
            module1[moduleName] = l_m #{}字典,冒号':'分开键和值,逗号','隔开组。[]可变列表
            print(l_m)
            f_module.close()
            
        print ('---------------infor of module crosstalk----------')
        def calInteractionNu(m1, m2, net): 
          f_net = open(net) 
          countCross = 0
          L=[]
          R=[]
          for l_net in f_net.readlines():
              pair = l_net.strip().split()
              if pair[0] in m1 and pair[1] in m2:
                  countCross += 1
                  L.append(pair)
          f_net.close()
          R.append(countCross)
          R.append(L)
          return (R)
      
        module2 = module1
        
        print ("'''自定义函数(找互作次数)'''")
        done = []
        for m1k, m1v in module1.items():
            for m2k, m2v in module2.items():
                pairModule1 = m1k+'\t'+m2k
                pairModule2 = m2k+'\t'+m1k
                if pairModule1 not in done and pairModule2 not in done:
                    done.append(pairModule1)
                    if m1k != m2k:
                        nRealPair = calInteractionNu(m1v, m2v, f_realNet)[0]
                        print(nRealPair)
                        crosstalk = calInteractionNu(m1v, m2v, f_realNet)[1]
                        f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(files)+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')
                        #f_crosstalk.write(m1k+'\t'+m2k+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')
                        #if nRealPair > 78:
                        #    print('yeah!!')
                        #    f_crosstalk.write(str(files)+'\t'+str(nRealPair)+'\t'+str(crosstalk)+'\n')
    
    f_crosstalk.close()             
    
    

    相关文章

      网友评论

          本文标题:WGCNA衍生:python识别模块间基因互作

          本文链接:https://www.haomeiwen.com/subject/ycqvqhtx.html