import os
import pandas as pd
# load configfile
# global vals
metadata = pd.read_csv("id", sep="\t").set_index("SampleID", drop=False)
te = pd.read_csv("id.txt", sep="\t").set_index("Te", drop=False)
samples = metadata["SampleID"]
samples_num = len(samples)
te=te["Te"]
# includes
def get_fastq(wildcards):
return metadata.loc[wildcards.samples, ["fq1"]].dropna()
# map trimmed fastq to genome
for i in samples:{
print(i)
}
for id in te:{
print(id)
}
rule all:
input:
expand( "fa/{te}/{samples}-{te}.fa",te=te,samples=samples),
# expand( "out/{te}/{samples}-{te}.bam",te=te,samples=samples),
# expand( "fa/{te}/{samples}-{te}.fa",te=te,samples=samples),
rule minimap2:
input:
fastq=("raw_data/{samples}.fq.gz"),
output:
# bam = ("out/{te}/{samples}-{te}.bam"),
fa = ("fa/{te}/{samples}-{te}.fa"),
# fa = ("fa/{te}/{samples}-{te}.fa"),
shell:
"""minimap2 -ax map-pb mini/{wildcards.te}.fa {input} |awk -F "\\t" '{{if ( ($1!~/^@/) && (($2==16) ) ) {{print ">"$1"\\n"$10}}}}' > {output.fa} """
##注意shell脚本
# """minimap2 -ax map-pb mini/{wildcards.te}.fa {input} | samtools view -bS -@ 22 - > {output.bam} && samtools view {output.bam} | awk -F "\t" '{if ( ($1!~/^@/) && (($2==16) ) ) {print ">"$1"\n"$10}}' > {output.fa} """
#rule bam2fq:
# input:
# bam = ("out/{wildcards.te}/{wildcards.samples}-{wildcards.te}.bam"),
# output:
# fa = ("fa/{wildcards.te}/{wildcards.samples}-{wildcards.te}.fa"),
# shell:
# """samtools view {input} | awk -F "\t" '{if ( ($1!~/^@/) && (($2==16) ) ) {print ">"$1"\n"$10}}' > {output}"""
网友评论