changeset 5:a3c4fe6f49ab draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanoplot/ commit 52e462654477b6acf9a087063d3ddb19f66f8b38
author iuc
date Thu, 16 Jun 2022 20:08:15 +0000
parents edbb6c5028f5
children 0f1c34698076
files nanoplot.xml test-data/HistogramReadlength.png test-data/NanoStats.txt test-data/NanoStats_post_filtering.txt
diffstat 4 files changed, 126 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/nanoplot.xml	Sun Jan 19 19:03:02 2020 -0500
+++ b/nanoplot.xml	Thu Jun 16 20:08:15 2022 +0000
@@ -1,38 +1,61 @@
-<tool id="nanoplot" name="NanoPlot" version="@TOOL_VERSION@+galaxy1">
+<tool id="nanoplot" name="NanoPlot" version="@TOOL_VERSION@+galaxy1" profile="20.05">
     <description>Plotting suite for Oxford Nanopore sequencing data and alignments</description>
     <macros>
-        <token name="@TOOL_VERSION@">1.28.2</token>
+        <token name="@TOOL_VERSION@">1.36.2</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">nanoplot</requirement>
     </requirements>
+    <stdio>
+        <regex match="kaleido problem" source="stderr" level="fatal" description="No static plots are saved due to some kaleido problem" />
+    </stdio>
     <version_command>NanoPlot --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
+## set TMPDIR if not already set by admin
+## otherwise kalleido fails with `Less than 64MB of free space in temporary directory for shared memory files: 0`
+## export TMPDIR=\${TMPDIR:-\$_GALAXY_JOB_TMP_DIR};
+
 #set $myfiles = $mode.reads.files
-
 #set reads_temp = []
 #if $mode.choice == 'combined':
     #for $i, $f in enumerate($myfiles)
-        #if $f.ext == "fastqsanger":
+        #if $f.ext.startswith("fastq"):
             #set $extension = 'fastq'
         #else
             #set $extension = $f.ext
         #end if
+        #if $f.ext.endswith(".gz"):
+            #set $f = $extension + ".gz"
+        #else if $f.ext.endswith(".bz"):
+            #set $extension = $extension + "bz2"
+        #end if
         ln -s '$f' './read_${i}.$extension' &&
+        #if "bam" in $extension
+            ln -s '$f.metadata.bam_index' './read_${i}.${extension}.bai' &&
+        #end if
         $reads_temp.append("read_" + str($i) + "." + str($extension))
     #end for
 #else
-    #if $myfiles.ext == "fastqsanger":
+    #if $myfiles.ext.startswith("fastq"):
         #set $extension = 'fastq'
     #else
         #set $extension = $myfiles.ext
     #end if
-    ln -s '$mode.reads.files' './read.$extension' &&
+    #if $myfiles.ext.endswith(".gz"):
+        #set $extension = $extension + ".gz"
+    #else if $myfiles.ext.endswith(".bz"):
+        #set $extension = $extension + "bz2"
+    #end if
+    ln -s '$myfiles' './read.$extension' &&
+    #if "bam" in $extension
+        ln -s '$myfiles.metadata.bam_index' './read.${extension}.bai' &&
+    #end if
     $reads_temp.append("read." + str($extension))
 #end if
 
 NanoPlot
     --threads \${GALAXY_SLOTS:-4}
+    --tsv_stats
     --$mode.reads.type ${' '.join($reads_temp)}
     #if $filter.maxlength
         --maxlength $filter.maxlength
@@ -46,15 +69,15 @@
     #if $filter.minqual
         --minqual $filter.minqual
     #end if
+    #if $filter.runtime_until
+        -- $filter.runtime_until
+    #end if
     #if $filter.readtype
         --readtype $filter.readtype
     #end if
     #if $customization.color
         --color $customization.color
     #end if
-    #if $customization.format
-        --format $customization.format
-    #end if
     #if $customization.plots
         --plots ${str($customization.plots).replace(',', ' ')}
     #end if
@@ -63,8 +86,10 @@
     $filter.percentqual
     $filter.alength
     $filter.barcoded
+    $filter.no_supplementary
     $customization.N50
     -o '.'
+    && >&2 cat *log
     ]]></command>
     <inputs>
         <conditional name="mode">
@@ -190,6 +215,11 @@
                 optional="true"
                 label="Drop reads with an average quality lower than specified."/>
             <param
+                type="integer"
+                argument="--runtime_until"
+                optional="true"
+                label="Only take the N first hours of a run"/>
+            <param
                 type="select"
                 argument="--readtype"
                 optional="true"
@@ -204,6 +234,12 @@
                 truevalue="--barcoded"
                 falsevalue=""
                 label="Use if you want to split the summary file by barcode."/>
+            <param
+                type="boolean"
+                argument="--no_supplementary"
+                truevalue="--no_supplementary"
+                falsevalue=""
+                label="Use if you want to remove supplementary alignments"/>
         </section>
         <section
             name="customization"
@@ -365,23 +401,13 @@
             </param>
             <param
                 type="select"
-                argument="--format"
-                optional="true"
-                label="Specify the output format of the plots.">
-                <option selected="True" value="png">png</option>
-                <option value="svg">svg</option>
-            </param>
-            <param
-                type="select"
                 argument="--plots"
                 optional="true"
                 multiple="true"
                 display="checkboxes"
                 label="Specify the bivariate format of the plots.">
-                <option value="kde">kde</option>
-                <option value="hex">hex</option>
-                <option value="dot">dot</option>
-                <option value="pauvre">pauvre</option>
+                <option value="kde">kernel density estimation (kde)</option>
+                <option value="dot">dots (dot)</option>
             </param>
             <param
                 type="boolean"
@@ -393,18 +419,10 @@
     </inputs>
     <outputs>
         <data name="output_html" format="html" from_work_dir="NanoPlot-report.html"  label="${tool.name} on ${on_string}: HTML report"/>
-        <data name="nanostats" format="txt" from_work_dir="NanoStats.txt" label="${tool.name} on ${on_string}: NanoStats"/>
-        <data name="nanostats_post_filtering" format="txt" from_work_dir="NanoStats_post_filtering.txt" label="${tool.name} on ${on_string}: NanoStats post filtering"/>
-        <data name="read_length" format="png" from_work_dir="HistogramReadlength.*" label="${tool.name} on ${on_string}: Histogram Read Length ">
-            <change_format>
-                <when input="customization.format" value="svg" format="svg"/>
-            </change_format>
-        </data>
-        <data name="log_read_length" format="png" from_work_dir="LogTransformed_HistogramReadlength.*" label="${tool.name} on ${on_string}: Log Transformed Histogram Read Length ">
-            <change_format>
-                <when input="customization.format" value="svg" format="svg"/>
-            </change_format>
-        </data>
+        <data name="nanostats" format="tabular" from_work_dir="NanoStats.txt" label="${tool.name} on ${on_string}: NanoStats"/>
+        <data name="nanostats_post_filtering" format="tabular" from_work_dir="NanoStats_post_filtering.txt" label="${tool.name} on ${on_string}: NanoStats post filtering" />
+        <data name="read_length" format="png" from_work_dir="Non_weightedHistogramReadlength.png" label="${tool.name} on ${on_string}: Histogram Read Length"/>
+        <data name="log_read_length" format="png" from_work_dir="Non_weightedLogTransformed_HistogramReadlength.png" label="${tool.name} on ${on_string}: Log Transformed Histogram Read Length"/>
     </outputs>
     <tests>
         <test>
@@ -412,18 +430,30 @@
                 <param name="choice" value="batch"/>
                 <conditional name="reads">
                     <param name="type" value="fastq_rich"/>
-                    <param name="files" value="reads.fastq.gz" ftype="fastq.gz"/>
+                    <param name="files" value="reads.fastq.gz" ftype="fastqsanger.gz"/>
                 </conditional>
             </conditional>
             <section name="filter">
                 <param name="downsample" value="800"/>
             </section>
             <section name="customization">
-                <param name="plots" value="hex,kde"/>
+                <param name="plots" value="kde"/>
             </section>
-            <output name="output_html" file="NanoPlot-report.html" ftype="html" lines_diff="8" />
-            <output name="nanostats_post_filtering" file="NanoStats_post_filtering.txt" ftype="txt"/>
-            <output name="read_length" file="HistogramReadlength.png" ftype="png" compare="sim_size" delta="3000"/>
+            <output name="output_html" ftype="html">
+                <assert_contents>
+                    <has_text text="html"/>
+                    <has_text text="mean_qual"/>
+                </assert_contents>
+            </output>
+            <output name="nanostats" file="NanoStats.txt" ftype="tabular"/>
+            <output name="nanostats_post_filtering" file="NanoStats_post_filtering.txt" ftype="tabular"/>
+            <output name="read_length" ftype="png" file="HistogramReadlength.png" compare="sim_size">
+            </output>
+            <output name="log_read_length" ftype="png">
+                <assert_contents>
+                    <has_text text="PNG"/>
+                </assert_contents>
+            </output>
         </test>
         <test>
             <conditional name="mode">
@@ -439,15 +469,18 @@
             </section>
             <section name="customization">
                 <param name="color" value="yellow"/>
-                <param name="format" value="svg"/>
             </section>
-            <output name="output_html" file="bam-report.html" ftype="html" lines_diff="16">
+            <output name="output_html" ftype="html">
                 <assert_contents>
                     <has_text text="html"/>
                     <has_text text="Aligned read length vs Percent identity plot using dots"/> <!-- bam report specific -->
                 </assert_contents>
             </output>
-            <output name="read_length" file="bam-LogTransformed_HistogramReadlength.svg" ftype="svg" compare="sim_size"/>
+            <output name="read_length" ftype="png">
+                <assert_contents>
+                    <has_text text="PNG"/>
+                </assert_contents>
+            </output>
         </test>
         <test><!-- test with multiple input files -->
              <conditional name="mode">
@@ -460,8 +493,8 @@
             <output name="output_html" ftype="html">
                 <assert_contents>
                     <has_text text="html"/>
-                    <not_has_text text="Aligned read length vs Percent identity plot using dots"/> <!-- bam report specific -->
-                    <has_text text="&lt;td&gt;9.0&lt;/td&gt;"/> <!--check both files were used 4+5 reads -->
+                    <not_has_text text="Aligned read length vs Percent identity plot using dots"/>
+                    <has_text text="&lt;td&gt;9&lt;/td&gt;"/> <!--check both files were used 4+5 reads -->
                 </assert_contents>
             </output>
         </test>
Binary file test-data/HistogramReadlength.png has changed
--- a/test-data/NanoStats.txt	Sun Jan 19 19:03:02 2020 -0500
+++ b/test-data/NanoStats.txt	Thu Jun 16 20:08:15 2022 +0000
@@ -1,6 +1,25 @@
-General summary:        
-Mean read length:           2,564.9
-Median read length:         1,910.0
-Number of reads:                9.0
-Read length N50:            4,275.0
-Total bases:               23,084.0
+Metrics	dataset
+number_of_reads	371
+number_of_bases	8611871.0
+median_read_length	9390.0
+mean_read_length	23212.6
+read_length_stdev	42729.8
+n50	60395.0
+active_channels	169
+mean_qual	10.1
+median_qual	10.3
+longest_read_(with_Q):1	393431 (10.6)
+longest_read_(with_Q):2	341725 (11.8)
+longest_read_(with_Q):3	320662 (8.0)
+longest_read_(with_Q):4	226843 (9.1)
+longest_read_(with_Q):5	221901 (10.0)
+highest_Q_read_(with_length):1	13.3 (6333)
+highest_Q_read_(with_length):2	13.1 (5448)
+highest_Q_read_(with_length):3	12.9 (27834)
+highest_Q_read_(with_length):4	12.8 (20351)
+highest_Q_read_(with_length):5	12.8 (97730)
+Reads >Q5:	371 (100.0%) 8.6Mb
+Reads >Q7:	371 (100.0%) 8.6Mb
+Reads >Q10:	207 (55.8%) 4.7Mb
+Reads >Q12:	49 (13.2%) 0.9Mb
+Reads >Q15:	0 (0.0%) 0.0Mb
--- a/test-data/NanoStats_post_filtering.txt	Sun Jan 19 19:03:02 2020 -0500
+++ b/test-data/NanoStats_post_filtering.txt	Thu Jun 16 20:08:15 2022 +0000
@@ -1,27 +1,25 @@
-General summary:        
-Active channels:                169.0
-Mean read length:            23,212.6
-Mean read quality:               10.1
-Median read length:           9,390.0
-Median read quality:             10.3
-Number of reads:                371.0
-Read length N50:             60,395.0
-Total bases:              8,611,871.0
-Number, percentage and megabases of reads above quality cutoffs
->Q5:	371 (100.0%) 8.6Mb
->Q7:	371 (100.0%) 8.6Mb
->Q10:	207 (55.8%) 4.7Mb
->Q12:	49 (13.2%) 0.9Mb
->Q15:	0 (0.0%) 0.0Mb
-Top 5 highest mean basecall quality scores and their read lengths
-1:	13.3 (6333)
-2:	13.1 (5448)
-3:	12.9 (27834)
-4:	12.8 (20351)
-5:	12.8 (97730)
-Top 5 longest reads and their mean basecall quality score
-1:	393431 (10.6)
-2:	341725 (11.8)
-3:	320662 (8.0)
-4:	226843 (9.1)
-5:	221901 (10.0)
+Metrics	dataset
+number_of_reads	371
+number_of_bases	8611871.0
+median_read_length	9390.0
+mean_read_length	23212.6
+read_length_stdev	42729.8
+n50	60395.0
+active_channels	169
+mean_qual	10.1
+median_qual	10.3
+longest_read_(with_Q):1	393431 (10.6)
+longest_read_(with_Q):2	341725 (11.8)
+longest_read_(with_Q):3	320662 (8.0)
+longest_read_(with_Q):4	226843 (9.1)
+longest_read_(with_Q):5	221901 (10.0)
+highest_Q_read_(with_length):1	13.3 (6333)
+highest_Q_read_(with_length):2	13.1 (5448)
+highest_Q_read_(with_length):3	12.9 (27834)
+highest_Q_read_(with_length):4	12.8 (20351)
+highest_Q_read_(with_length):5	12.8 (97730)
+Reads >Q5:	371 (100.0%) 8.6Mb
+Reads >Q7:	371 (100.0%) 8.6Mb
+Reads >Q10:	207 (55.8%) 4.7Mb
+Reads >Q12:	49 (13.2%) 0.9Mb
+Reads >Q15:	0 (0.0%) 0.0Mb