changeset 2:87459bd1615a draft

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
author public-health-bioinformatics
date Thu, 27 Oct 2022 00:37:27 +0000
parents 3cde438eb222
children 899a650587ed
files adjust_bracken_for_unclassified_reads.py adjust_bracken_for_unclassified_reads.xml test-data/input/zero_unclassified_bracken_abundances.tsv test-data/input/zero_unclassified_kraken2.txt test-data/output/SRR17907745_bracken_abundances_adjusted.tsv test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv
diffstat 6 files changed, 41 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/adjust_bracken_for_unclassified_reads.py	Thu Mar 10 21:39:43 2022 +0000
+++ b/adjust_bracken_for_unclassified_reads.py	Thu Oct 27 00:37:27 2022 +0000
@@ -43,23 +43,14 @@
     kraken_report = parse_kraken_report(args.kraken_report)
     bracken_abundances = parse_bracken_abundances(args.bracken_abundances)
 
-    kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level']
+    try:
+        kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level']
+    except IndexError as e:
+        kraken_report_unclassified_seqs = 0
     kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total']
 
     total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs
-    percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs)
-
-    bracken_unclassified_entry = {
-        'name': 'unclassified',
-        'taxonomy_id': 0,
-        'taxonomy_lvl': 'U',
-        'kraken_assigned_seqs': kraken_report_unclassified_seqs,
-        'bracken_assigned_seqs': kraken_report_unclassified_seqs,
-        'kraken_fraction_total_seqs': percent_unclassified,
-        'bracken_fraction_total_seqs': 0.0,
-    }
-
-    bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
+    fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs)
 
     output_fieldnames = [
         'name',
@@ -82,7 +73,20 @@
         bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs)
         b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs)
 
-    for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True):
+    bracken_unclassified_entry = {
+        'name': 'unclassified',
+        'taxonomy_id': 0,
+        'taxonomy_lvl': 'U',
+        'kraken_assigned_seqs': kraken_report_unclassified_seqs,
+        'bracken_assigned_seqs': kraken_report_unclassified_seqs,
+        'total_seqs': total_seqs,
+        'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
+        'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
+    }
+    
+    bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True)
+    bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
+    for b in bracken_abundances:
         writer.writerow(b)
 
 
--- a/adjust_bracken_for_unclassified_reads.xml	Thu Mar 10 21:39:43 2022 +0000
+++ b/adjust_bracken_for_unclassified_reads.xml	Thu Oct 27 00:37:27 2022 +0000
@@ -1,5 +1,5 @@
-<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.1.0">
-    <description>Adjust bracken report to account for unclassified reads.</description>
+<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.2.0+galaxy0">
+    <description>Adjust bracken report to account for unclassified reads</description>
     <requirements>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
@@ -26,6 +26,11 @@
             <param name="bracken_abundances" value="input/SRR17907745_bracken_abundances.tsv"/>
             <output name="adjusted_bracken_report" file="output/SRR17907745_bracken_abundances_adjusted.tsv" ftype="tabular"/>
         </test>
+        <test>
+            <param name="kraken_report" value="input/zero_unclassified_kraken2.txt"/>
+            <param name="bracken_abundances" value="input/zero_unclassified_bracken_abundances.tsv"/>
+            <output name="adjusted_bracken_report" file="output/zero_unclassified_bracken_abundances_adjusted.tsv" ftype="tabular"/>
+        </test>
     </tests>
     <help><![CDATA[
     ]]></help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/zero_unclassified_bracken_abundances.tsv	Thu Oct 27 00:37:27 2022 +0000
@@ -0,0 +1,2 @@
+name	taxonomy_id	taxonomy_lvl	kraken_assigned_reads	added_reads	new_est_reads	fraction_total_reads
+Klebsiella pneumoniae	573	S	25	70	95	0.95
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/zero_unclassified_kraken2.txt	Thu Oct 27 00:37:27 2022 +0000
@@ -0,0 +1,9 @@
+ 100.0	100	5	R	1	root
+  95.0	95	10	R1	131567	  cellular organisms
+  85.0	85	10	D	2	    Bacteria
+  75.0	75	20	P	1224	      Proteobacteria
+  65.0	65	10	C	1236	        Gammaproteobacteria
+  55.0	55	10	O	91347	          Enterobacterales
+  45.0	45	10	F	543	            Enterobacteriaceae
+  35.0	35	10	G	570	              Klebsiella
+  25.0	25	10	S	573	                Klebsiella pneumoniae
--- a/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv	Thu Mar 10 21:39:43 2022 +0000
+++ b/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv	Thu Oct 27 00:37:27 2022 +0000
@@ -1,9 +1,9 @@
 name	taxonomy_id	taxonomy_lvl	kraken_assigned_seqs	bracken_assigned_seqs	total_seqs	kraken_fraction_total_seqs	bracken_fraction_total_seqs
+unclassified	0	U	110613	110613	2570868	0.043026	0.043026
 Klebsiella quasipneumoniae	1463165	S	484958	1017029	2570868	0.188636	0.395598
 Escherichia coli	562	S	181539	751229	2570868	0.070614	0.292208
 Klebsiella pneumoniae	573	S	94362	315713	2570868	0.036704	0.122804
 Citrobacter freundii	546	S	116592	132960	2570868	0.045351	0.051718
-unclassified	0	U	110613	110613	2570868	0.043026	0.043026
 Enterobacter hormaechei	158836	S	74706	85010	2570868	0.029059	0.033067
 Enterobacter cloacae	550	S	73130	79124	2570868	0.028446	0.030777
 Klebsiella variicola	244366	S	4879	10752	2570868	0.001898	0.004182
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv	Thu Oct 27 00:37:27 2022 +0000
@@ -0,0 +1,3 @@
+name	taxonomy_id	taxonomy_lvl	kraken_assigned_seqs	bracken_assigned_seqs	total_seqs	kraken_fraction_total_seqs	bracken_fraction_total_seqs
+unclassified	0	U	0	0	100	0.000000	0.000000
+Klebsiella pneumoniae	573	S	25	95	100	0.250000	0.950000