Mercurial > repos > public-health-bioinformatics > adjust_bracken_for_unclassified_reads
comparison adjust_bracken_kreport_for_unclassified_reads.py @ 3:899a650587ed draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 24535690aedb81353cf5e036dc4577022d9604ad
author | public-health-bioinformatics |
---|---|
date | Thu, 27 Oct 2022 19:13:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:87459bd1615a | 3:899a650587ed |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import csv | |
5 import json | |
6 import sys | |
7 | |
8 | |
9 def parse_kraken_report(kraken_report_path): | |
10 kraken_report = [] | |
11 with open(kraken_report_path, 'r') as f: | |
12 for line in f: | |
13 kraken_line = {} | |
14 [percentage, seqs_total, seqs_this_level, taxonomic_level, ncbi_taxid, taxon_name] = line.strip().split('\t') | |
15 kraken_line['percentage'] = float(percentage) | |
16 kraken_line['seqs_total'] = int(seqs_total) | |
17 kraken_line['seqs_this_level'] = int(seqs_this_level) | |
18 kraken_line['taxonomic_level'] = taxonomic_level | |
19 kraken_line['ncbi_taxid'] = ncbi_taxid | |
20 kraken_line['taxon_name'] = taxon_name | |
21 kraken_report.append(kraken_line) | |
22 | |
23 return kraken_report | |
24 | |
25 | |
26 def main(args): | |
27 kraken_report = parse_kraken_report(args.kraken_report) | |
28 kraken_style_bracken_report = parse_kraken_report(args.kraken_style_bracken_report) | |
29 | |
30 try: | |
31 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] | |
32 except IndexError as e: | |
33 kraken_report_unclassified_seqs = 0 | |
34 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] | |
35 kraken_style_bracken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_style_bracken_report))[0]['seqs_total'] | |
36 | |
37 total_seqs = kraken_style_bracken_report_classified_seqs + kraken_report_unclassified_seqs | |
38 fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) | |
39 | |
40 output_fieldnames = [ | |
41 'percentage', | |
42 'seqs_total', | |
43 'seqs_this_level', | |
44 'taxonomic_level', | |
45 'ncbi_taxid', | |
46 'taxon_name', | |
47 ] | |
48 | |
49 writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='excel-tab') | |
50 | |
51 bracken_unclassified_entry = { | |
52 'percentage': '{:.2f}'.format(kraken_report_unclassified_seqs / total_seqs * 100), | |
53 'seqs_total': kraken_report_unclassified_seqs, | |
54 'seqs_this_level': kraken_report_unclassified_seqs, | |
55 'taxonomic_level': 'U', | |
56 'ncbi_taxid': 0, | |
57 'taxon_name': 'unclassified', | |
58 } | |
59 | |
60 for row in kraken_style_bracken_report: | |
61 row['percentage'] = '{:.2f}'.format(row['seqs_total'] / total_seqs * 100) | |
62 | |
63 kraken_style_bracken_report_with_unclassified = [bracken_unclassified_entry] + kraken_style_bracken_report | |
64 for row in kraken_style_bracken_report_with_unclassified: | |
65 writer.writerow(row) | |
66 | |
67 | |
68 if __name__ == '__main__': | |
69 parser = argparse.ArgumentParser() | |
70 parser.add_argument('-k', '--kraken-report') | |
71 parser.add_argument('-b', '--kraken-style-bracken-report') | |
72 args = parser.parse_args() | |
73 main(args) |