changeset 4:cc0366f0bdf7 draft

Uploaded
author lehmanju
date Fri, 16 Oct 2020 06:20:35 +0000
parents bf3dc4cae5bf
children 3a150fca6d60
files rna_quast.xml
diffstat 1 files changed, 99 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/rna_quast.xml	Wed Oct 14 07:03:06 2020 +0000
+++ b/rna_quast.xml	Fri Oct 16 06:20:35 2020 +0000
@@ -9,9 +9,65 @@
         </xml>
         <xml name="element_has_text" token_name="" token_text="">
             <element name="@NAME@">
-                <assert_contents><has_text text="@TEXXT@"/></assert_contents>
+                <assert_contents><has_text text="@TEXT@"/></assert_contents>
+            </element>
+        </xml>
+
+        <xml name="details_output_test" token_assembler="">
+            <element name="@ASSEMBLER@">
+                <element name="5000%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
+                <element name="9500%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
+                <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*"/>
+                <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*"/>
+                <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*"/>
+                <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*"/>
+                <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*"/>
+                <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*"/>
+                <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*"/>
             </element>
         </xml>
+
+        <xml name="txt_output_test" token_assembler="">
+            <output name="short_report_txt">
+                <assert_contents>
+                    <has_text text="SHORT SUMMARY REPORT"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="tex_output_test" token_assembler="">
+            <output name="short_report_tex">
+                <assert_contents>
+                    <has_text text="Short summary report"/>
+                    <has_text text="end{document}"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="tsv_output_test" token_assembler="">
+            <output name="short_report_tsv">
+                <assert_contents>
+                    <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="pdf_output_test" token_assembler="">
+            <output name="short_report_pdf">
+                <assert_contents>
+                    <has_text text="rnaQUAST short report"/>
+                </assert_contents>
+            </output>
+        </xml>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
@@ -21,7 +77,7 @@
     </stdio>
     <command detect_errors="exit_code"><![CDATA[
     #import re
-    #for $i in $input
+    #for $i in $in_fasta
         ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
     #end for
     #if $r
@@ -38,7 +94,7 @@
     rnaQUAST.py
     --threads \${GALAXY_SLOTS:-1}
     --transcripts
-    #for $i in $input
+    #for $i in $in_fasta
          '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
     #end for
     $strand_specific
@@ -63,18 +119,22 @@
     #end if
     $blat
     $busco_lineage
-    $gene_mark
+    ##GeneMarkS-T is not available in conda $gene_mark
+    $meta
     --lower_threshold $lower_threshold
     --upper_threshold $upper_threshold
     -o outputdir
-     && mkdir details 
-     #for $i in $input
+    && mkdir details
+    ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
+    ## to a joint dir (details) to make them discoverable
+    ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
+    #for $i in $in_fasta
         #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
         &&
         (for f in \$(find 'outputdir/'$basename'_output' -type f);
         do
             d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
-            mv \$f details/"\$d"_____"\$(basename \$f)";
+            mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
         done)
     #end for
     ## rename .list files to .txt files to make them detectable (format detection by extension)
@@ -83,7 +143,7 @@
     && true
     ]]></command>
     <inputs>
-        <param name="input" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
+        <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
         <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
         <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
         <conditional name="gene_coordinates">
@@ -103,7 +163,8 @@
         <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
         <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
         <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
-        <param argument="--gene_mark" type="boolean" truevalue="--gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>
+        <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>-->
+        <param argument="--meta"  type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome"/>
         <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
         <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
         <param name="out_sr" type="select" multiple="true" label="Short report formats">
@@ -138,13 +199,15 @@
             <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log"  directory="outputdir/logs/" visible="false" />
             <filter>"logs" in out_add</filter>
         </collection>
+        <!-- note the output filter of the next two outputs checks if there is
+             more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) -->
         <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
             <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png"  directory="outputdir/comparison_output/" visible="false" recurse="true"/>
-            <filter> len(input)>1 and "plots" in out_add</filter>
+            <filter> isinstance(in_fasta, list) and "plots" in out_add</filter>
         </collection>
         <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
             <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt"  directory="outputdir/comparison_output/" visible="false" recurse="true" />
-            <filter> len(input)>1 and "comparison" in out_add</filter>
+            <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter>
         </collection>
         <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/>
@@ -157,7 +220,7 @@
     </outputs>
     <tests>
         <test expect_num_outputs="7">
-            <param name="input" value="idba.fasta,Trinity.fasta" ftype="fasta" />
+            <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" />
             <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
             <conditional name="gene_coordinates">
                 <param name="use_gtf" value="true" />
@@ -167,32 +230,19 @@
             </conditional>
             <param name="out_sr" value="txt,tex,tsv" />
             <param name="out_add" value="logs,comparison,plots,details" />
-            <output name="short_report_txt">
-                <assert_contents>
-                    <has_text text="SHORT SUMMARY REPORT"/>
-                </assert_contents>
-            </output>
-            <output name="short_report_tex">
-                <assert_contents>
-                    <has_text text="Short summary report"/>
-                    <has_text text="end{document}"/>
-                </assert_contents>
-            </output>
-            <output name="short_report_tsv">
-                <assert_contents>
-                    <has_line_matching expression="^METRICS/TRANSCRIPTS\tidba\tTrinity$"/>
-                </assert_contents>
-            </output>
+            <expand macro="txt_output_test"/>
+            <expand macro="tex_output_test"/>
+            <expand macro="tsv_output_test"/>
             <output_collection name="comparison_png" type="list" count="15"/>
             <output_collection name="comparison" type="list" count="19"/>
             <output_collection name="list_logs" type="list" count="8"/>
             <output_collection name="details" type="list:list" count="2">
-                <output_collection name="Trinity" type="list" count="21"/>
-		        <output_collection name="idba" type="list" count="21"/>
+                <expand macro="details_output_test" assembler="Trinity"/>
+                <expand macro="details_output_test" assembler="idba"/>
             </output_collection>
         </test>
-        <test expect_num_outputs="8">
-            <param name="input" value="Trinity.fasta" ftype="fasta" />
+        <test expect_num_outputs="6">
+            <param name="in_fasta" value="Trinity.fasta" ftype="fasta" />
             <conditional name="gene_coordinates">
                 <param name="use_gtf" value="false" />
             </conditional>
@@ -201,16 +251,20 @@
             <param name="upper_threshold" value="95"/>
             <param name="out_sr" value="txt,tex,tsv,pdf" />
             <param name="out_add" value="logs,details_plots" />
-            <output name="short_report_pdf" file="short_report.pdf" compare="sim_size"/>
-            <output name="short_report_txt" file="short_report.txt" compare="sim_size"/>
-            <output name="short_report_tex" file="short_report.tex" compare="sim_size"/>
-            <output name="short_report_tsv" file="short_report.tsv" compare="sim_size"/>
+
+            <expand macro="pdf_output_test"/>
+            <expand macro="tex_output_test"/>
+            <expand macro="tsv_output_test"/>
+            <expand macro="txt_output_test"/>
             <output_collection name="list_logs" type="list">
-                <element name="rnaQUAST" file="rnaQUAST"/>
-                <element name="Trinity.GeneMarkS_T.err" file="spades.311.GeneMarkS_T.err"/>
+                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text=""/>
+                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!"/>
             </output_collection>
             <output_collection name="details_png" type="list:list" count="1">
-                <output_collection name="Trinity" type="list" count="11"/>
+                <element name="Trinity">
+                    <expand macro="element_has_text" name="Nx" text="PNG"/>
+                    <expand macro="element_has_text" name="transcript_length" text="PNG"/>
+                </element>
             </output_collection>
         </test>
     </tests>
@@ -219,29 +273,25 @@
 - a quality assessment tool for de novo transcriptome assemblies
 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
-    
-**Using rnaQuast without reference** you wont get:  
-  
-- x-assembled (Exons) 
-- Alignments per Isoform 
+**Using rnaQuast without reference** you wont get:
+- x-assembled (Exons)
+- Alignments per Isoform
 - x-covered (Exons)
 - x-matched (Blocks)
 - gmap build logs
-    
 **Using rnaQuast with reference** you will get:
 - Reports
 - Logs
 - Alignement/Basic Metrics
 - Misassemblies/ Specificity/ Sensitivity
 - Alignment multiplicity
-- Block/ Transcript Lentgh 
+- Block/ Transcript Lentgh
 - Blocks per alignment
 - Mismatch rate
 - x-aligned
-- Nx 
+- Nx
 - Blocks per alignment
 - gmap build logs
-    
 **Using rnaQuast without gene coordinates** you wont get:
 - x-assembled (Exons)
 - Alignments per Isoform
@@ -252,7 +302,7 @@
 - Alignment multiplicity
 - Mismatch rate
 - NAx
-- x-aligned 
+- x-aligned
 **Using rnaQuast with gene coordinates** you will get:
 - Reports
 - Logs