Mercurial > repos > nick > allele_counts
diff allele-counts.py @ 8:411adeff1eec draft
Handle "." sample columns, update tests to work with BIAS column.
author | nick |
---|---|
date | Tue, 23 Aug 2016 02:30:56 -0400 |
parents | df3b28364cd2 |
children | 6cc488e11544 |
line wrap: on
line diff
--- a/allele-counts.py Wed Dec 09 11:37:02 2015 -0500 +++ b/allele-counts.py Tue Aug 23 02:30:56 2016 -0400 @@ -238,7 +238,7 @@ if len(fields) < 9: fail("Error in input VCF: wrong number of fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) site['chr'] = fields[0] site['pos'] = fields[1] @@ -246,35 +246,38 @@ if len(samples) < len(sample_names): fail("Error in input VCF: missing sample fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) elif len(samples) > len(sample_names): fail("Error in input VCF: more sample fields in data line than in header. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) sample_counts = {} for i in range(len(samples)): - + variant_counts = {} counts = samples[i].split(':')[-1] counts = counts.split(',') for count in counts: - if not count: + if not count or count == '.': continue fields = count.split('=') if len(fields) != 2: fail("Error in input VCF: Incorrect variant data format (must contain " - +"a single '='). Failed on line:\n"+line) + "a single '='). Failed on data \"{}\" in line:\n{}" + .format(count, line)) (variant, reads) = fields if variant[1:] not in canonical: continue - if variant[0] != '-' and variant[0] != '+': - fail("Error in input VCF: variant data not strand-specific. " - +"Failed on line:\n"+line) + if not variant.startswith('-') and not variant.startswith('+'): + fail("Error in input VCF: variant data not strand-specific. Failed on " + "data \"{}\" on line:\n{}".format(variant, line)) try: variant_counts[variant] = int(float(reads)) except ValueError: - fail("Error in input VCF: Variant count not a valid number. Failed on variant count string '"+reads+"'\nIn the following line:\n"+line) + fail("Error in input VCF: Variant count not a valid number. Failed on " + "variant count string \"{}\"\nIn the following line:\n{}" + .format(reads, line)) sample_counts[sample_names[i]] = variant_counts