美文网首页
python自学笔记

python自学笔记

作者: 夹竹桃的下午 | 来源:发表于2022-01-05 16:40 被阅读0次

    文件读取写出

    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    import sys, os
    file=open("./huyangzxz","r")
    lines = file.readlines()
    for line in lines:
        tmp=line.split("\t")
        gene=tmp[1].split('.')[0]+"\n"
        with open("./7.30test.txt","a") as f:
            f.write(gene)
    file.close()
    

    用pandas处理 按列名 合并

    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    import sys, os
    import pandas as pd
    import numpy as np
    df1=pd.read_excel('/home/zjp/1/aaxabPsimoniiMap.xlsx',sheet_name='SLG1')
    df2=pd.read_csv('/home/zjp/1/aaxab.txt',sep='\t')
    data=df1.loc[:,['Female ref']] #取出这一行
    data.columns=['SP']
    a=pd.merge(data,df2) #按data顺序合并
    a.drop(labels=['1'],axis = 1,inplace=True) #删除 axis是列 0是行 inplace替换原文件
    a.to_csv("aaxabSLG1",sep='\t') 保存
    
    对列重命名
    
    x = sys.argv[1]
    out=  sys.argv[2]#
    df1 = pd.read_table(x,header=None)
    list1=df1.values.tolist()
    data=pd.DataFrame(list1)
    newcol=[]
    for i in range(1,146):
            i=str(i)
            newcol.append(i)
    #print(newcol)
    data.columns=newcol
    #print(data)
    data=data.drop(["55","56","57","58","59","60","101","102","103","104","105","106"],axis=1)
    data.to_csv(out,sep='\t',header=False,index=False)
    

    字典

    #!/usr/bin/env python
    
    chr_length = {}
    with open('./p.simonii.fasta.fai') as f:
        for line in f:
            line = line.strip().split('\t')
            chr_length[line[0]] = int(line[1])
    ###两列构建字典
    with open('./p.simonii.fasta.ssr.bed') as f:
        for line in f:
            line = line.strip().split('\t')
            chr_name = line[0]
            start = line[1]
            End = line[2]
            if int(start) < 0:
                start = 0
            if int(End) > chr_length[chr_name]:
                End = chr_length[chr_name]
            print(chr_name,start,End,sep='\t')
    
    import os,sys
    x=sys.argv[1]
    with open (x) as f:
            lines=f.readlines()
            for line in lines :
                    tmp=line.strip().split('\t')
                    a=tmp[0]
                    b=a+'.R1.fastq_filtered'
                    c=a+'.R2.fastq_filtered'
                    print (tmp[0]+"\t"+b+"\t"+c)
    import re,os,sys
    x=sys.argv[1]
    y=sys.argv[2]
    def mufun(a):
        b=re.findall(r'\w+',a)
        if(abs(len(b[0]*int(b[1]))-len(b[2]*int(b[3])))>4):
            return 1
        else:
            return 0
    with open (x) as f:
        lines=f.readlines()
        for line in lines:
            tmp=line.strip().split('\t')
            sum=0
            if(tmp[3].find("(")!=-1 and tmp[4].find("(")!=-1 and tmp[5].find("(")!=-1):
                sum=sum+mufun(tmp[3])+mufun(tmp[4])+ mufun(tmp[5])
                if(sum>0):
                    with open(y,"a+") as f2:
                        f2.write(line)
    
    

    相关文章

      网友评论

          本文标题:python自学笔记

          本文链接:https://www.haomeiwen.com/subject/rfckcrtx.html