# HG changeset patch # User public-health-bioinformatics # Date 1666831047 0 # Node ID 87459bd1615a3fdd50417f4466357b79ee7f76f8 # Parent 3cde438eb222a165acf53d0608ae14165cc80e3b planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1 diff -r 3cde438eb222 -r 87459bd1615a adjust_bracken_for_unclassified_reads.py --- a/adjust_bracken_for_unclassified_reads.py Thu Mar 10 21:39:43 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.py Thu Oct 27 00:37:27 2022 +0000 @@ -43,23 +43,14 @@ kraken_report = parse_kraken_report(args.kraken_report) bracken_abundances = parse_bracken_abundances(args.bracken_abundances) - kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] + try: + kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] + except IndexError as e: + kraken_report_unclassified_seqs = 0 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs - percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) - - bracken_unclassified_entry = { - 'name': 'unclassified', - 'taxonomy_id': 0, - 'taxonomy_lvl': 'U', - 'kraken_assigned_seqs': kraken_report_unclassified_seqs, - 'bracken_assigned_seqs': kraken_report_unclassified_seqs, - 'kraken_fraction_total_seqs': percent_unclassified, - 'bracken_fraction_total_seqs': 0.0, - } - - bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) output_fieldnames = [ 'name', @@ -82,7 +73,20 @@ bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) - for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): + bracken_unclassified_entry = { + 'name': 'unclassified', + 'taxonomy_id': 0, + 'taxonomy_lvl': 'U', + 'kraken_assigned_seqs': kraken_report_unclassified_seqs, + 'bracken_assigned_seqs': kraken_report_unclassified_seqs, + 'total_seqs': total_seqs, + 'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + 'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + } + + bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True) + bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + for b in bracken_abundances: writer.writerow(b) diff -r 3cde438eb222 -r 87459bd1615a adjust_bracken_for_unclassified_reads.xml --- a/adjust_bracken_for_unclassified_reads.xml Thu Mar 10 21:39:43 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.xml Thu Oct 27 00:37:27 2022 +0000 @@ -1,5 +1,5 @@ - - Adjust bracken report to account for unclassified reads. + + Adjust bracken report to account for unclassified reads + + + + + diff -r 3cde438eb222 -r 87459bd1615a test-data/input/zero_unclassified_bracken_abundances.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input/zero_unclassified_bracken_abundances.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,2 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads +Klebsiella pneumoniae 573 S 25 70 95 0.95 diff -r 3cde438eb222 -r 87459bd1615a test-data/input/zero_unclassified_kraken2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input/zero_unclassified_kraken2.txt Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,9 @@ + 100.0 100 5 R 1 root + 95.0 95 10 R1 131567 cellular organisms + 85.0 85 10 D 2 Bacteria + 75.0 75 20 P 1224 Proteobacteria + 65.0 65 10 C 1236 Gammaproteobacteria + 55.0 55 10 O 91347 Enterobacterales + 45.0 45 10 F 543 Enterobacteriaceae + 35.0 35 10 G 570 Klebsiella + 25.0 25 10 S 573 Klebsiella pneumoniae diff -r 3cde438eb222 -r 87459bd1615a test-data/output/SRR17907745_bracken_abundances_adjusted.tsv --- a/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Mar 10 21:39:43 2022 +0000 +++ b/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -1,9 +1,9 @@ name taxonomy_id taxonomy_lvl kraken_assigned_seqs bracken_assigned_seqs total_seqs kraken_fraction_total_seqs bracken_fraction_total_seqs +unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Klebsiella quasipneumoniae 1463165 S 484958 1017029 2570868 0.188636 0.395598 Escherichia coli 562 S 181539 751229 2570868 0.070614 0.292208 Klebsiella pneumoniae 573 S 94362 315713 2570868 0.036704 0.122804 Citrobacter freundii 546 S 116592 132960 2570868 0.045351 0.051718 -unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Enterobacter hormaechei 158836 S 74706 85010 2570868 0.029059 0.033067 Enterobacter cloacae 550 S 73130 79124 2570868 0.028446 0.030777 Klebsiella variicola 244366 S 4879 10752 2570868 0.001898 0.004182 diff -r 3cde438eb222 -r 87459bd1615a test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,3 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_seqs bracken_assigned_seqs total_seqs kraken_fraction_total_seqs bracken_fraction_total_seqs +unclassified 0 U 0 0 100 0.000000 0.000000 +Klebsiella pneumoniae 573 S 25 95 100 0.250000 0.950000