Mercurial > repos > public-health-bioinformatics > micall_lite
comparison amino2consensus.py @ 0:023064145bea draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
author | public-health-bioinformatics |
---|---|
date | Mon, 06 Jan 2020 19:10:15 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:023064145bea |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import csv | |
7 | |
8 | |
9 AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] | |
10 | |
11 | |
12 def determine_amino(amino_counts, threshold): | |
13 amino = "" | |
14 total_count = sum(amino_counts.values()) | |
15 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] | |
16 if total_count == 0: | |
17 amino = "#" | |
18 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: | |
19 amino = amino_with_max_counts | |
20 else: | |
21 amino = "@" | |
22 return amino | |
23 | |
24 | |
25 def determine_first_region(amino_file): | |
26 with open(amino_file) as f: | |
27 reader = csv.DictReader(f) | |
28 row = next(reader) | |
29 region = row['region'] | |
30 return region | |
31 | |
32 | |
33 def main(args): | |
34 current_region = determine_first_region(args.amino) | |
35 seq = [] | |
36 with open(args.amino) as f: | |
37 reader = csv.DictReader(f) | |
38 for row in reader: | |
39 if row['region'] == current_region: | |
40 amino_counts = {} | |
41 for amino_acid in AMINO_ACIDS: | |
42 amino_counts[amino_acid] = int(row[amino_acid]) | |
43 amino = determine_amino(amino_counts, args.threshold) | |
44 seq.append(amino) | |
45 else: | |
46 print(">" + current_region) | |
47 print(''.join(seq)) | |
48 current_region = row['region'] | |
49 seq = [] | |
50 amino_counts = {} | |
51 for amino_acid in AMINO_ACIDS: | |
52 amino_counts[amino_acid] = int(row[amino_acid]) | |
53 amino = determine_amino(amino_counts, args.threshold) | |
54 seq.append(amino) | |
55 print(">" + current_region) | |
56 print(''.join(seq)) | |
57 | |
58 | |
59 if __name__ == '__main__': | |
60 parser = argparse.ArgumentParser() | |
61 parser.add_argument("amino", help="MiCall amino.csv output file") | |
62 parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") | |
63 args = parser.parse_args() | |
64 main(args) |