view get_hrun.py @ 0:84f70ce0b830 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
author iuc
date Tue, 02 Mar 2021 21:35:40 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python
import argparse

import vcf
from pyfaidx import Fasta
from vcf.parser import _Info as VcfInfo


parser = argparse.ArgumentParser(description='Generate report tables')
parser.add_argument("--reference",
                    required=True,
                    help="Filepath to reference FASTA file")
parser.add_argument("--in-vcf",
                    required=True,
                    help="Filepath to vcf file to be analyzed")
parser.add_argument("--out-vcf",
                    required=True,
                    help="Filepath to vcf file to be output")

args = parser.parse_args()
ref_path = args.reference
reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000)
in_vcf_path = args.in_vcf
in_vcf_handle = open(in_vcf_path)
in_vcf = vcf.Reader(in_vcf_handle)
in_vcf.infos['HRUN'] = VcfInfo(
    'HRUN', 1, 'Integer',
    'Homopolymer length to the right of report indel position',
    "get_hrun", "1.0")
out_vcf_path = args.out_vcf
out_vcf_handle = open(out_vcf_path, 'w')
out_vcf = vcf.Writer(out_vcf_handle, in_vcf)
for record in in_vcf:
    chrom = record.CHROM
    pos = record.POS - 1
    ref = record.REF
    calc_hrun = False
    for alt in record.ALT:
        if len(ref) != len(alt):
            calc_hrun = True
    if calc_hrun:
        window = 50
        hrun = 1
        start = pos + 2
        end = start + window
        base = reference[chrom][pos + 1]
        seq_len = len(reference[chrom])
        for i in range(start, len(reference)):
            base2 = reference[chrom][i]
            if base == base2:
                hrun += 1
            else:
                break
        # Extend to left in case not left aligned
        for i in range(pos, -1, -1):
            if reference[chrom][i] == base:
                hrun += 1
            else:
                break
        record.add_info('HRUN', [hrun])
    out_vcf.write_record(record)
in_vcf_handle.close()
out_vcf.close()
out_vcf_handle.close()