Mercurial > repos > public-health-bioinformatics > adjust_bracken_for_unclassified_reads
comparison adjust_bracken_for_unclassified_reads.py @ 0:3ab9d37e547e draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
author | public-health-bioinformatics |
---|---|
date | Thu, 10 Mar 2022 21:35:14 +0000 |
parents | |
children | 87459bd1615a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3ab9d37e547e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import csv | |
5 import json | |
6 import sys | |
7 | |
8 def parse_bracken_abundances(bracken_abundances_path): | |
9 bracken_abundances = [] | |
10 with open(bracken_abundances_path, 'r') as f: | |
11 reader = csv.DictReader(f, dialect='excel-tab') | |
12 for row in reader: | |
13 b = {} | |
14 b['name'] = row['name'] | |
15 b['taxonomy_id'] = row['taxonomy_id'] | |
16 b['taxonomy_lvl'] = row['taxonomy_lvl'] | |
17 b['kraken_assigned_seqs'] = int(row['kraken_assigned_reads']) | |
18 b['bracken_assigned_seqs'] = int(row['new_est_reads']) | |
19 b['bracken_fraction_total_seqs'] = float(row['fraction_total_reads']) | |
20 bracken_abundances.append(b) | |
21 | |
22 return bracken_abundances | |
23 | |
24 | |
25 def parse_kraken_report(kraken_report_path): | |
26 kraken_report = [] | |
27 with open(kraken_report_path, 'r') as f: | |
28 for line in f: | |
29 kraken_line = {} | |
30 [percentage, seqs_total, seqs_this_level, taxonomic_level, ncbi_taxid, taxon_name] = line.strip().split(None, 5) | |
31 kraken_line['percentage'] = float(percentage) | |
32 kraken_line['seqs_total'] = int(seqs_total) | |
33 kraken_line['seqs_this_level'] = int(seqs_this_level) | |
34 kraken_line['taxonomic_level'] = taxonomic_level | |
35 kraken_line['ncbi_taxid'] = ncbi_taxid | |
36 kraken_line['taxon_name'] = taxon_name | |
37 kraken_report.append(kraken_line) | |
38 | |
39 return kraken_report | |
40 | |
41 | |
42 def main(args): | |
43 kraken_report = parse_kraken_report(args.kraken_report) | |
44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances) | |
45 | |
46 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] | |
47 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] | |
48 | |
49 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs | |
50 percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) | |
51 | |
52 bracken_unclassified_entry = { | |
53 'name': 'unclassified', | |
54 'taxonomy_id': 0, | |
55 'taxonomy_lvl': 'U', | |
56 'kraken_assigned_seqs': kraken_report_unclassified_seqs, | |
57 'bracken_assigned_seqs': kraken_report_unclassified_seqs, | |
58 'kraken_fraction_total_seqs': percent_unclassified, | |
59 'bracken_fraction_total_seqs': 0.0, | |
60 } | |
61 | |
62 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances | |
63 | |
64 output_fieldnames = [ | |
65 'name', | |
66 'taxonomy_id', | |
67 'taxonomy_lvl', | |
68 'kraken_assigned_seqs', | |
69 'bracken_assigned_seqs', | |
70 'total_seqs', | |
71 'kraken_fraction_total_seqs', | |
72 'bracken_fraction_total_seqs', | |
73 ] | |
74 | |
75 writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='excel-tab') | |
76 writer.writeheader() | |
77 | |
78 for b in bracken_abundances: | |
79 b['total_seqs'] = total_seqs | |
80 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs) | |
81 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs) | |
82 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) | |
83 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) | |
84 | |
85 for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): | |
86 writer.writerow(b) | |
87 | |
88 | |
89 if __name__ == '__main__': | |
90 parser = argparse.ArgumentParser() | |
91 parser.add_argument('-k', '--kraken-report') | |
92 parser.add_argument('-a', '--bracken-abundances') | |
93 args = parser.parse_args() | |
94 main(args) |