Mercurial > repos > iuc > get_hrun
comparison get_hrun.py @ 0:84f70ce0b830 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
| author | iuc |
|---|---|
| date | Tue, 02 Mar 2021 21:35:40 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:84f70ce0b830 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 | |
| 4 import vcf | |
| 5 from pyfaidx import Fasta | |
| 6 from vcf.parser import _Info as VcfInfo | |
| 7 | |
| 8 | |
| 9 parser = argparse.ArgumentParser(description='Generate report tables') | |
| 10 parser.add_argument("--reference", | |
| 11 required=True, | |
| 12 help="Filepath to reference FASTA file") | |
| 13 parser.add_argument("--in-vcf", | |
| 14 required=True, | |
| 15 help="Filepath to vcf file to be analyzed") | |
| 16 parser.add_argument("--out-vcf", | |
| 17 required=True, | |
| 18 help="Filepath to vcf file to be output") | |
| 19 | |
| 20 args = parser.parse_args() | |
| 21 ref_path = args.reference | |
| 22 reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000) | |
| 23 in_vcf_path = args.in_vcf | |
| 24 in_vcf_handle = open(in_vcf_path) | |
| 25 in_vcf = vcf.Reader(in_vcf_handle) | |
| 26 in_vcf.infos['HRUN'] = VcfInfo( | |
| 27 'HRUN', 1, 'Integer', | |
| 28 'Homopolymer length to the right of report indel position', | |
| 29 "get_hrun", "1.0") | |
| 30 out_vcf_path = args.out_vcf | |
| 31 out_vcf_handle = open(out_vcf_path, 'w') | |
| 32 out_vcf = vcf.Writer(out_vcf_handle, in_vcf) | |
| 33 for record in in_vcf: | |
| 34 chrom = record.CHROM | |
| 35 pos = record.POS - 1 | |
| 36 ref = record.REF | |
| 37 calc_hrun = False | |
| 38 for alt in record.ALT: | |
| 39 if len(ref) != len(alt): | |
| 40 calc_hrun = True | |
| 41 if calc_hrun: | |
| 42 window = 50 | |
| 43 hrun = 1 | |
| 44 start = pos + 2 | |
| 45 end = start + window | |
| 46 base = reference[chrom][pos + 1] | |
| 47 seq_len = len(reference[chrom]) | |
| 48 for i in range(start, len(reference)): | |
| 49 base2 = reference[chrom][i] | |
| 50 if base == base2: | |
| 51 hrun += 1 | |
| 52 else: | |
| 53 break | |
| 54 # Extend to left in case not left aligned | |
| 55 for i in range(pos, -1, -1): | |
| 56 if reference[chrom][i] == base: | |
| 57 hrun += 1 | |
| 58 else: | |
| 59 break | |
| 60 record.add_info('HRUN', [hrun]) | |
| 61 out_vcf.write_record(record) | |
| 62 in_vcf_handle.close() | |
| 63 out_vcf.close() | |
| 64 out_vcf_handle.close() |
