数据准备
circos前期需要准备的文件很多,首先是KARYOTYPE — BIOLOGY APPLICATIONS
染色体长度统计(KARYOTYPE — BIOLOGY APPLICATIONS )
"""
@Description: Calculate the length of chromosomes and get it ready for the Circos.
@useage: python count_chr_length.py input_flie output.txt
@File: count_chr_length.py
@Time: 2020/10/13
"""
import sys
import pandas as pd
input_file = sys.argv[1]
output_file = sys.argv[2]
dic = {}
with open(input_file, "r") as read_fa:
for line in read_fa:
if line.startswith(">"):
key = line.strip("[>\n]")
dic[key] = 0
else:
value = line.strip()
seq_len = len(value)
dic[key] += seq_len
# data frame - nice~~~
df_raw = pd.DataFrame(dic, index = ["end"])
# transformation
df = df_raw.T
df["chr"] = "chr"
df["start"] = 0
df["-"] = "-"
df["label"] = df_raw.columns
df["ID"] = df_raw.columns
# reorder
index = ["chr", "-", "ID", "label", "start", "end"]
# if head != False, some strings will be find in "start and end"
reslut = df[index]
reslut.to_csv(path_or_buf = output_file, sep = "\t", index = False, header = False)
网友评论