美文网首页
python 读写excel (合并单元格)

python 读写excel (合并单元格)

作者: 树懒吃糖_ | 来源:发表于2020-12-18 15:45 被阅读0次

    第一次使用python来处理excel,主要是为了记录下合并单元格,单元格样式
    示例:从左边 变成右边,将第一列的 实现合并单元格


    图片.png

    工具:python3, xlrd ,xlwt

    import xlrd
    import sys
    import xlwt
    import os
    
    #根据化疗位点,来提取突变检出
    def Chemo25Sites(path):
        chemosites = dict()
        file = open(path)
        lines = file.readlines()
        rows = len(lines)
        for i in range(1, rows):
            aa = lines[i].strip().split('\t')
            chr, pos, rsid, ref, wild_type = aa
            chemosites[rsid] = wild_type+'\t'+chr+'\t'+pos+'\t'+ref
        file.close()
        return chemosites
    
    
    def getChemoVariants(path, vcf):
        #vcf : sample.vatriants_all.vcf
        chemosites = Chemo25Sites(path)
        outfile = open('./chemo_result_genotype.txt','w')
        chemovariants = dict()
        for key in chemosites.keys():
            cmd = 'grep -w {key} {infile}'.format(key=key, infile=vcf)
            res = os.popen(cmd).readlines()
    
            if len(res) == 0:   #该化疗位点没有检出
                value = chemosites[key].split('\t')
                genotype, chrom, pos, ref = value
                #print('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype]))
                chemovariants[key] = [key, chrom, pos, ref, "*", "*", "*", "*", genotype]
                outfile.write('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype])+'\n')
            elif len(res) == 1:  #该化疗位点有检出
                line = res[0].strip().split('\t')
                chr, start, _, ref, alt, depth, ad, freq = line[0:8]
                if float(freq) > 0.7:
                    if alt=="-":
                        genotype = "del/del"
                    else:
                        genotype = alt + alt  #纯和突变
                elif float(freq) < 0.2:   #野生型
                    if len(ref)>1:
                        genotype = ref+'/'+ref
                    else:
                        genotype = ref+ref
                else:
                    if alt=='-':
                        genotype = ref+'/del'
                    else:
                        genotype = ref+alt    #杂合突变
                #print('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype]))
                chemovariants[key] = [key, chr, start, ref, alt, depth, ad, freq, genotype]
                outfile.write('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype])+'\n')
            elif len(res) > 1:
                print("Warnning: {key} has more than one variant!!".format(key=key))
        return chemovariants
    
    ## 注释的信息表
    # 检索key: 药物+rsid+基因型
    def chemoAnnotationInfos(annofile):
        anno_dict = dict()
        with open(annofile, encoding='utf-8') as file:
            for line in file:
                aa = line.strip().split('\t')
                if len(aa) != 6:
                    print(line)
                _, drug, rsid, genotype, _, anno_chinese = aa
                key = drug+'\t'+rsid+'\t'+genotype
                anno_dict[key] = anno_chinese
        return anno_dict
    
    def GetChemoAnnotation(annofile, path, vcf, excelfile):
        anno_dict = chemoAnnotationInfos(annofile)  #key:  drug+'\t'+rsid+'\t'+genotype ; val :anno_chinese
        chemovariants = getChemoVariants(path, vcf)  #key: rsid ;   val:[key, chr, start, ref, alt, depth, ad, freq, genotype]
    
        #read excel.sheet
        data = xlrd.open_workbook(excelfile)
        table = data.sheet_by_name('sheet1')
        table2 = data.sheet_by_name('sheet2')
        table3 = data.sheet_by_name('sheet3')
        row_numb = table.nrows
        col_numb = table.ncols
    
        row_numb2 = table2.nrows
        col_numb2 = table2.ncols
    
        row_numb3 = table3.nrows
        col_numb3 = table3.ncols
    
        new_excel = xlwt.Workbook()
        sheet1 = new_excel.add_sheet('sheet1', cell_overwrite_ok=True)
        sheet2 = new_excel.add_sheet('sheet2', cell_overwrite_ok=True)
        sheet3 = new_excel.add_sheet('sheet3', cell_overwrite_ok=True)
    
        pattern = xlwt.Pattern()
        pattern.pattern = xlwt.Pattern.SOLID_PATTERN
        pattern.pattern_fore_colour = 12
        #单元格边框样式
        borders = xlwt.Borders()
        borders.left = 2
        borders.right = 2
        borders.top = 2
        borders.bottom = 2
        # 初始化样式1
        style1 = xlwt.XFStyle()
        style1.pattern = pattern
        style1.borders = borders
        # 初始化样式2
        style2 = xlwt.XFStyle()
        style2.borders = borders
    
    
        for st in [sheet1, sheet2, sheet3]:
            st.write(0, 0, "化疗药物", style1)
            st.write(0, 1, "检测基因", style1)
            st.write(0, 2, "检测位点", style1)
            st.write(0, 3, "基因型", style1)
            st.write(0, 4, "证据等级", style1)
            st.write(0, 5, "注释", style1)
    
        #或者 table 中的单元格值
        for i in range(1, row_numb):
            drug = table.cell(i, 0).value
            gene = table.cell(i, 1).value
            rsid = table.cell(i, 2).value
            genotype = table.cell(i, 3).value
            level = table.cell(i, 4).value
    
            key = drug+'\t'+rsid+'\t'+genotype
    
            cur_genotype = chemovariants.get(rsid, 'NA')
            if cur_genotype == "NA":
                cur_genotype = genotype
            else:
                cur_genotype = cur_genotype[-1]
            annotation_chinese = anno_dict.get(key, 'NA')
    
            sheet1.write(i, 0, drug, style2)
            sheet1.write(i, 1, gene, style2)
            sheet1.write(i, 2, rsid, style2)
            sheet1.write(i, 3, cur_genotype, style2)
            sheet1.write(i, 4, level, style2)
            sheet1.write(i, 5, annotation_chinese, style2)
        sheet1.write_merge(1,12,0,0, table.cell(1,0).value, style2)
        sheet1.write_merge(14,16,0,0, table.cell(14,0).value, style2)
        sheet1.write_merge(18,24,0,0, table.cell(18,0).value, style2)
        sheet1.write_merge(25,26,0,0, table.cell(25,0).value, style2)
        sheet1.write_merge(28,29,0,0, table.cell(28,0).value, style2)
        sheet1.write_merge(30,31,0,0, table.cell(30,0).value, style2)
        sheet1.write_merge(32,33,0,0, table.cell(32,0).value, style2)
    
        for i in range(1, row_numb2):
            drug = table2.cell(i, 0).value
            gene = table2.cell(i, 1).value
            rsid = table2.cell(i, 2).value
            genotype = table2.cell(i, 3).value
            level = table2.cell(i, 4).value
    
            key = drug+'\t'+rsid+'\t'+genotype
    
            cur_genotype = chemovariants.get(rsid, 'NA')
            if cur_genotype == "NA":
                cur_genotype = genotype
            else:
                cur_genotype = cur_genotype[-1]
            annotation_chinese = anno_dict.get(key, 'NA')
    
            sheet2.write(i, 0, drug, style2)
            sheet2.write(i, 1, gene, style2)
            sheet2.write(i, 2, rsid, style2)
            sheet2.write(i, 3, cur_genotype, style2)
            sheet2.write(i, 4, level, style2)
            sheet2.write(i, 5, annotation_chinese, style2)
        sheet2.write_merge(1,7,0,0,table.cell(1,0).value, style2)
        sheet2.write_merge(8,11,0,0,table.cell(8,0).value, style2)
        sheet2.write_merge(12,13,0,0,table.cell(12,0).value, style2)
        sheet2.write_merge(14,17,0,0,table.cell(14,0).value, style2)
        sheet2.write_merge(18,20,0,0,table.cell(18,0).value, style2)
        sheet2.write_merge(22,24,0,0,table.cell(22,0).value, style2)
        sheet2.write_merge(25,30,0,0,table.cell(25,0).value, style2)
        sheet2.write_merge(31,33,0,0,table.cell(31,0).value, style2)
    
    
        for i in range(1, row_numb3):
            drug = table3.cell(i, 0).value
            gene = table3.cell(i, 1).value
            rsid = table3.cell(i, 2).value
            genotype = table3.cell(i, 3).value
            level = table3.cell(i, 4).value
    
            key = drug+'\t'+rsid+'\t'+genotype
    
            cur_genotype = chemovariants.get(rsid, 'NA')
            if cur_genotype == "NA":
                cur_genotype = genotype
            else:
                cur_genotype = cur_genotype[-1]
            annotation_chinese = anno_dict.get(key, 'NA')
    
            sheet3.write(i, 0, drug, style2)
            sheet3.write(i, 1, gene, style2)
            sheet3.write(i, 2, rsid, style2)
            sheet3.write(i, 3, cur_genotype, style2)
            sheet3.write(i, 4, level, style2)
            sheet3.write(i, 5, annotation_chinese, style2)
        sheet3.write_merge(4,6,0,0,table.cell(4,0).value, style2)
        sheet3.write_merge(9,14,0,0,table.cell(9,0).value, style2)
        sheet3.write_merge(16,18,0,0,table.cell(16,0).value, style2)
    
        new_excel.save(r'./chemo_result.xls')
    
    annofile = '~/chemo/chemo_annotation_info.txt'
    path = '~/chemo/chemo_25sites_info.txt'
    #vcf = './BP20201204-2.vatriants_all.txt'
    excelfile = '~/chemo/tample.xls'
    vcf = sys.argv[1]
    GetChemoAnnotation(annofile, path, vcf, excelfile)
    
    

    相关文章

      网友评论

          本文标题:python 读写excel (合并单元格)

          本文链接:https://www.haomeiwen.com/subject/yttfnktx.html