1、文件1:样本突变文件out.csv
![](https://img.haomeiwen.com/i22798912/7fa235c44041c297.png)
2、cosmic注释文件: hg19_cosmic89_somatic.txt.gz
![](https://img.haomeiwen.com/i22798912/bf94786ab9b05510.png)
3、输出文件:
对突变位点进行cosmic注释
![](https://img.haomeiwen.com/i22798912/95b09daf90569d54.png)
二、脚本:
import gzip
import pandas as pd
sample = 'out.csv'
output = 'out_9.csv'
with gzip.open('hg19_cosmic89_somatic.txt.gz', 'rt') as f:
cosmic = pd.read_table(f)
cosmic['#Chr'] = 'chr' + cosmic['#Chr'].astype(str)
cosmic['ID'] = cosmic['#Chr'] + '-' + cosmic['Start'].map(str) + '-' + cosmic['Ref'] + '-' + cosmic['Alt']
anno = cosmic[['ID', 'cosmic_id', 'cosmic_CDS', 'cosmic_pHGVs', 'cosmic_CNT']]
cosmic_index = anno.set_index('ID')
data = pd.DataFrame(pd.read_csv(sample, engine='python'))
data['sum'] = data['CHROM'] + '-' + data['POS'].map(str) + '-' + data['REF'] + '-' + data['ALT']
data_index = data.set_index('sum')
res = pd.merge(data_index,cosmic_index,left_index=True, right_index=True, how='left')
res.to_csv(output,index=False)
网友评论