Mercurial > repos > iuc > rnaquast

diff rna_quast.xml @ 5:f89e3c318453 draft
planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit c633f5c634128e3c81ab48e94df6f703dd005c46
author: iuc
date: Wed, 07 Jun 2023 12:02:03 +0000
parents: f9f2ad782d8f
children: 8e66f695d859
--- a/rna_quast.xml	Thu Jan 20 21:09:47 2022 +0000
+++ b/rna_quast.xml	Wed Jun 07 12:02:03 2023 +0000
@@ -1,125 +1,54 @@
-<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>A quality assessment tool for De Novo transcriptome assemblies</description>
-    <xrefs>
-        <xref type="bio.tools">rnaQUAST</xref>
-    </xrefs>
     <macros>
-        <token name="@TOOL_VERSION@">2.2.1</token>
-        <token name="@VERSION_SUFFIX@">1</token>
-        <xml name="element_matching_line" token_name="" token_expression="">
-            <element name="@NAME@">
-                <assert_contents>
-                    <has_line_matching expression="@EXPRESSION@" />
-                </assert_contents>
-            </element>
-        </xml>
-        <xml name="element_has_text" token_name="" token_text="">
-            <element name="@NAME@">
-                <assert_contents>
-                    <has_text text="@TEXT@" />
-                </assert_contents>
-            </element>
-        </xml>
-        <xml name="details_output_test" token_assembler="">
-            <element name="@ASSEMBLER@">
-                <element name="5000%-assembled.list">
-                    <assert_contents>
-                        <has_n_lines n="0" />
-                    </assert_contents>
-                </element>
-                <element name="9500%-assembled.list">
-                    <assert_contents>
-                        <has_n_lines n="0" />
-                    </assert_contents>
-                </element>
-                <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" />
-                <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" />
-                <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" />
-                <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" />
-                <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" />
-                <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" />
-                <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" />
-                <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" />
-                <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" />
-                <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" />
-                <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" />
-            </element>
-        </xml>
-
-        <xml name="txt_output_test" token_assembler="">
-            <output name="short_report_txt">
-                <assert_contents>
-                    <has_text text="SHORT SUMMARY REPORT" />
-                </assert_contents>
-            </output>
-        </xml>
-        <xml name="tex_output_test" token_assembler="">
-            <output name="short_report_tex">
-                <assert_contents>
-                    <has_text text="Short summary report" />
-                    <has_text text="end{document}" />
-                </assert_contents>
-            </output>
-        </xml>
-        <xml name="tsv_output_test" token_assembler="">
-            <output name="short_report_tsv">
-                <assert_contents>
-                    <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" />
-                </assert_contents>
-            </output>
-        </xml>
-        <xml name="pdf_output_test" token_assembler="">
-            <output name="short_report_pdf">
-                <assert_contents>
-                    <has_text text="rnaQUAST short report" />
-                </assert_contents>
-            </output>
-        </xml>
+        <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
-    </requirements>
+    <expand macro='xrefs'/>
+    <expand macro='requirements'/>
     <stdio>
         <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
     </stdio>
     <command detect_errors="exit_code"><![CDATA[
-    #import re
-    #for $i in $transcripts
+    mkdir -p './complete_reports/' &&
+    mkdir -p './fasta_files/' &&
+    #import os, re, glob
+    #for $i in $transcripts        
         ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
     #end for
-    #if $r
-        #for $rf in $r
+    #if $reference
+        #for $rf in $reference
             ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
         #end for
     #end if
-    #if $gene_coordinates.use_gtf == "true"
+    #if $gene_coordinates.selector == "true"
         #for $g in $gene_coordinates.gtf
             ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
         #end for
-    #end if
+    #end if    
     mkdir outputdir &&
     rnaQUAST.py
-    --threads \${GALAXY_SLOTS:-1}
+    --threads \${GALAXY_SLOTS:-8}
     --transcripts
     #for $i in $transcripts
          '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
     #end for
-    $strand_specific
-    #if $r
+    #if $reads_option.selector == 'paired'
+        --left_reads '${reads_option.forward_reads}'
+        --right_reads '${reads_option.reverse_reads}'
+    #else if $reads_option.selector == 'single'
+        --single_reads '${reads_option.single_reads}'
+    #end if
+    $advanced_options.strand_specific
+    #if $reads_alignment
+        --reads_alignment '${reads_alignment}'
+    #end if
+    #if $reference
         -r
-        #for $rf in $r
+        #for $rf in $reference
             '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
         #end for
     #end if
-    #if $gene_coordinates.use_gtf == "true"
+    #if $gene_coordinates.selector == "true"
         --gtf
         #for $g in $gene_coordinates.gtf
             '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
@@ -127,271 +56,428 @@
         $gene_coordinates.disable_infer_genes
         $gene_coordinates.disable_infer_transcripts
     #end if
-    $prokaryote
-    --min_alignment '$min_alignment'
-    #if "pdf" not in $out_sr and "plots" not in $out_add
+    $advanced_options.prokaryote
+    --min_alignment $advanced_options.min_alignment
+    $advanced_options.blat
+
+    #if "pdf" not in $output_options.out_sr
         --no_plots
     #end if
-    $blat
-    #if $busco_option.busco == 'true'
-        --busco $busco_option.lineage
+    #if $use_busco.selector == 'true'
+        --busco 
+        #if $use_busco.lineage_conditional.selector == 'cached':
+            '${use_busco.lineage_conditional.cached_db.fields.path}'
+        #else
+            $use_busco.lineage
+        #end if
     #end if
-    ##$gene_mark
-    $meta
-    --lower_threshold $lower_threshold
-    --upper_threshold $upper_threshold
+    ## $advanced_options.gene_mark
+    $advanced_options.meta
+    --lower_threshold $advanced_options.lower_threshold
+    --upper_threshold $advanced_options.upper_threshold
     -o outputdir
 
-    && mkdir details
-
-    ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
-    ## to a joint dir (details) to make them discoverable
-    ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
-    #for $i in $transcripts
-        #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
-        &&
-        (for f in \$(find 'outputdir/'$basename'_output' -type f);
-        do
-            d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
-            mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
-        done)
-    #end for
-
-    ## rename .list files to .txt files to make them detectable (format detection by extension)
-    ## the final `true` seems needed since otherwise the `;` at the end is swallowed
-    && find details/ -name "*.list" -exec mv {} {}.txt \;
-    && true
+    #if 'gz' in $output_options.out_add
+        && tar -czvf results.tar.gz './outputdir'
+    #end if
+    
+    #if len($transcripts) == 1
+        #set $path = "/".join(['outputdir',($transcripts[0].element_identifier).split(".")[0]]) + "_output"
+        && mv '${path}' './results'
+        ## rename .list files to .txt files to make them detectable
+        && find './results/' -name "*.list" -exec mv {} {}.txt \;
+        && true
+        && printf "************ METRICS/TRANSCRIPTS ***************\n" > stats.txt
+        && for file_name in ./results/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt 
+        && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; 
+        done
+        && cat stats.txt > $stats
+        #if $gene_coordinates.selector == 'true' and $reference
+            && mv ./results/*fasta ./fasta_files/
+        #end if
+    #else
+        && mkdir -p './results/'
+        #if $gene_coordinates.selector == 'true' and $reference
+            #for $i, $transcript in enumerate($transcripts)
+                #set $path = "/".join(['outputdir',($transcripts[$i].element_identifier).split(".")[0]]) + "_output"
+                && rm -r ./results
+                && cp -r $path './results'
+                && mv ./results/*fasta './fasta_files/'
+            #end for
+        #end if
+        && find './outputdir/comparison_output' -name "*.list" -exec mv {} {}.txt \;
+        && true
+        && printf "************ COMPARISON METRICS ***************\n" > stats.txt
+        && for file_name in ./outputdir/comparison_output/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt 
+        && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; done
+        && cat stats.txt > $stats
+    #end if
     ]]>    </command>
     <inputs>
         <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/>
-        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" 
-            help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
-        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
+        <conditional name="reads_option">
+            <param name="selector" type="select" label="Single-end or paired-end reads">
+                <option value="" selected="true">Disabled-end</option>
+                <option value="single" selected="true">Single-end</option>
+                <option value="paired">Paired-end (as individual datasets)</option>
+            </param>
+            <when value=""/>
+            <when value="single">
+                <param format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" name="single_reads" type="data" label="RNA-Seq FASTQ/FASTA file"/>
+            </when>
+            <when value="paired">
+                <param name="forward_reads" format="fastq,fastq.gz,fastqsanger ,fastqsanger.gz"  type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>
+                <param name="reverse_reads" format="fastq,fastq.gz,fastqsanger, fastqsanger.gz"  type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>
+            </when>
+        </conditional>
+        <param argument="--reference" type="data" format="fasta" label="Reference genome"  multiple="true" optional="true" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
         <conditional name="gene_coordinates">
-            <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
-                <option value="true" selected="true">Yes</option>
-                <option value="false">No</option>
+            <param name="selector" type="select" label="Genome annotation" help="Genome annotation file. We recommend to use files downloaded from GENCODE or Ensembl.">
+                <option value="true">Enabled</option>
+                <option value="false" selected="true">Disabled</option>
             </param>
             <when value="true">
-                <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
-                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" 
+                <param argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
+                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" Disable infer genes" 
                     help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/>
-                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
+                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="Disable infer transcripts" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
             </when>
             <when value="false">
             </when>
         </conditional>
-        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/>
-        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
-        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
-        <conditional name="busco_option">
-          <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts">
+        <param argument="--reads_alignment" type="data" format="sam" label="Aligned reads to reference genome" optional="true" help="File with read alignments to the reference genome" />
+        <conditional name="use_busco">
+          <param argument="selector" type="select" label="Run BUSCO" help="BUSCO allows to detect core genes in the assembled transcripts">
               <option value="false">Disabled</option>
               <option value="true">Enabled</option>
           </param>
           <when value="false"/>
           <when value="true">
-            <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
-                <option value="metazoa">Metazoa</option>
-                <option value="eukaryota">Eukaryota</option>
-                <option value="arthropoda">Arthropoda</option>
-                <option value="vertebrata">Vertebrata</option>
-                <option value="fungi">Fungi</option>
-                <option value="bacteria">Bacteria</option>
-            </param>
+            <conditional name="lineage_conditional">
+                <param name="selector" type="select" label="Lineage data source">
+                    <option value="download">Download lineage data</option>
+                    <option value="cached" selected="true">Use cached lineage data</option>
+                </param>
+                <when value="cached">
+                    <param name="cached_db" label="Cached database with lineage" type="select">
+                        <options from_data_table="busco_database">
+                            <validator message="No BUSCO database is available" type="no_options" />
+                        </options>
+                    </param>
+                </when>
+                <when value="download">
+                    <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
+                        <option value="metazoa">Metazoa</option>
+                        <option value="eukaryota">Eukaryota</option>
+                        <option value="arthropoda">Arthropoda</option>
+                        <option value="vertebrata">Vertebrata</option>
+                        <option value="fungi">Fungi</option>
+                        <option value="bacteria">Bacteria</option>
+                    </param>
+                </when>
+            </conditional>
           </when>
         </conditional>
-        <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
-        <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
-        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
-        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
-        <param name="out_sr" type="select" multiple="true" label="Short report formats">
-            <option value="tsv" selected="true">tabular</option>
-            <option value="txt">txt</option>
-            <option value="tex">tex</option>
-            <option value="pdf" selected="true">pdf</option>
-        </param>
-        <param name="out_add" type="select" multiple="true" label="Additional outputs">
-            <option value="logs">Logs</option>
-            <option value="plots" selected="true">Plots (only for n>1)</option>
-            <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
-            <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
-            <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
-        </param>
+        <section name="advanced_options" title="Advaced options" >
+            <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific RNA-seq data" 
+                help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
+            <param argument="--min_alignment" type="integer" min="0" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
+            <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT instead of GMAP" help="BALT is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
+            <!-- GeneMarkST is not in Bioconda -->
+            <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" 
+                help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
+            <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
+            <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
+            <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
+            <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Prokararyotic organism(s)" help="Use this option if the genome is prokaryotic"/>
+        </section>
+        <section name="output_options" title="Output options" expanded="true">
+            <param name="out_sr" type="select" multiple="true"  display="checkboxes" label="Short report formats">
+                <option value="tabular">Tabular</option>
+                <option value="tex">TeX</option>
+                <option value="pdf" selected="true">PDF</option>
+            </param>
+            <param name="out_add" type="select" label="Additional outputs" multiple="true" display="checkboxes">
+                <option value="complete">Complete report</option>
+                <option value="fasta" >FASTA files</option>
+                <option value="logs">Logs</option>
+                <option value="gz">Compressed output folder</option>
+            </param>
+        </section>
     </inputs>
-
     <outputs>
-        <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
-            <filter>"pdf" in out_sr</filter>
-        </data>
-        <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
-            <filter>"txt" in out_sr</filter>
-        </data>
-        <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
-            <filter>"tex" in out_sr</filter>
-        </data>
-        <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
-            <filter>"tsv" in out_sr</filter>
+        <data name="stats" format="txt" label="${tool.name} on ${on_string}: complete report">
+            <filter>output_options['out_add'] and "complete" in  output_options['out_add']</filter>
         </data>
         <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs">
-            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" />
-            <filter>"logs" in out_add</filter>
+            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs" visible="false" />
+            <filter>output_options['out_add'] and "logs" in  output_options['out_add']</filter>
         </collection>
-        <!-- note the output filter of the next two outputs checks if there is
-             more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) -->
-        <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots">
-            <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" />
-            <filter> isinstance(transcripts, list) and "plots" in out_add</filter>
+        <collection name="fasta_files" type="list" label="${tool.name} on ${on_string}: FASTA files">
+            <discover_datasets ext="fasta" pattern="(?P&lt;name&gt;.+)\.fasta" directory="fasta_files" visible="false" />
+            <filter>output_options['out_add'] and "fasta" in  output_options['out_add']</filter>
+            <filter>gene_coordinates['selector'] == 'true'</filter>
+            <filter>reference</filter>
         </collection>
-        <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison">
-            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
-            <filter> isinstance(transcripts, list) and "comparison" in out_add</filter>
-        </collection>
-        <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false" />
-            <filter>"details" in out_add</filter>
-        </collection>
-        <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false" />
-            <filter>"details_plots" in out_add</filter>
-        </collection>
+        <data name="compressed_files" format="tgz" label="${tool.name} on ${on_string}: compressed results folder" from_work_dir="results.tar.gz">
+            <filter>output_options['out_add'] and "gz" in output_options['out_add']</filter>
+        </data>
+        <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: short report (pdf)" from_work_dir="outputdir/short_report.pdf">
+            <filter>output_options['out_sr'] and "pdf" in output_options['out_sr']</filter>
+        </data>
+        <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: short report (tex)" from_work_dir="outputdir/short_report.tex">
+            <filter>output_options['out_sr'] and "tex" in output_options['out_sr']</filter>
+        </data>
+        <data name="short_report_tabular" format="tabular" label="${tool.name} on ${on_string}: short report (tabular)" from_work_dir="outputdir/short_report.tsv">
+            <filter>output_options['out_sr'] and "tabular" in output_options['out_sr']</filter>
+        </data>
     </outputs>
     <tests>
-        <test expect_num_outputs="7">
-            <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" />
-            <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
+        <!-- Test 01: Minimum input txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <output name="short_report_tabular" file="test_01_short_report.tab"/>
+        </test>
+        <!-- Test 02: Transcriptome reference,single read,  txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <conditional name="reads_option">
+                <param name="selector" value="single"/>
+                <param name="single_reads" value="single_end.fastq.gz"/>
+            </conditional>
+            <output name="short_report_tabular">
+                <assert_contents>
+                    <has_text text="Transcripts" />
+                    <has_size value="95" delta="5"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 03: Transcriptome reference and annotation, txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
             <conditional name="gene_coordinates">
-                <param name="use_gtf" value="true" />
-                <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
-                <param name="disable_infer_genes" value="true" />
-                <param name="disable_infer_transcripts" value="true" />
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
+            </conditional>
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <conditional name="reads_option">
+                <param name="selector" value="single"/>
+                <param name="single_reads" value=""/>
+            </conditional>
+            <output name="short_report_tabular" file="test_03_short_report.tab"/>
+        </test>
+        <!-- Test 04: Transcriptome reference and annotation, txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <conditional name="gene_coordinates">
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
+            </conditional>
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <conditional name="reads_option">
+                <param name="selector" value="single"/>
+                <param name="single_reads" value="single_end.fastq.gz"/>
+            </conditional>
+            <output name="short_report_tabular">
+                <assert_contents>
+                    <has_text text="Transcripts" />
+                    <has_size value="140" delta="5"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 05: Transcriptome reference, annotation and mapping, txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <conditional name="gene_coordinates">
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
             </conditional>
-            <param name="out_sr" value="txt,tex,tsv" />
-            <param name="out_add" value="logs,comparison,plots,details" />
-            <expand macro="txt_output_test" />
-            <expand macro="tex_output_test" />
-            <expand macro="tsv_output_test" />
-            <output_collection name="comparison_png" type="list" count="15" />
-            <output_collection name="comparison" type="list" count="19" />
-            <output_collection name="list_logs" type="list" count="8" />
-            <output_collection name="details" type="list:list" count="2">
-                <expand macro="details_output_test" assembler="Trinity" />
-                <expand macro="details_output_test" assembler="idba" />
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <conditional name="reads_option">
+                <param name="selector" value='paired'/>
+                <param name="forward_reads" value="input_F.fastqsanger"/>
+                <param name="reverse_reads" value="input_F.fastqsanger"/>
+            </conditional>
+            <output name="short_report_tabular">
+                <assert_contents>
+                    <has_text text="Transcripts" />
+                    <has_size value="140" delta="5"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 06: Transcriptome reference, annotation, mapping and BUSCO, txt output-->
+        <test expect_num_outputs="1">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <conditional name="gene_coordinates">
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
+            </conditional>
+            <conditional name="reads_option">
+                <param name="selector" value='paired'/>
+                <param name="forward_reads" value="input_F.fastqsanger"/>
+                <param name="reverse_reads" value="input_R.fastqsanger"/>
+            </conditional>
+            <section name="output_options">
+                <param name="out_sr" value="tabular"/>
+            </section>
+            <conditional name="use_busco">
+                <param name="selector" value="true"/>
+                <conditional name="lineage_conditional">
+                    <param name="selector" value="cached"/>
+                    <param name="cached_db" value="busco-demo-db-20230328"/>
+                </conditional>
+            </conditional>
+            <output name="short_report_tabular">
+                <assert_contents>
+                    <has_text text="Transcripts" />
+                    <has_size value="140" delta="5"/>
+                </assert_contents>
+            </output>
+                        
+        </test>
+        <!-- Test 07: Transcriptome reference, annotation, mapping and BUSCO, additional outputs-->
+        <test expect_num_outputs="4">
+            <param name="transcripts" value="transcriptome01.fasta"/>
+            <conditional name="gene_coordinates">
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
+            </conditional>
+            <param name="reference" value="reference.fasta"/>
+            <conditional name="reads_option">
+                <param name="selector" value='paired'/>
+                <param name="forward_reads" value="input_F.fastqsanger"/>
+                <param name="reverse_reads" value="input_R.fastqsanger"/>
+            </conditional>
+            <conditional name="use_busco">
+                <param name="selector" value="true"/>
+                <conditional name="lineage_conditional">
+                    <param name="selector" value="cached"/>
+                    <param name="cached_db" value="busco-demo-db-20230328"/>
+                </conditional>
+            </conditional>
+             <section name="output_options">
+                <param name="out_sr" value="pdf,tabular"/>
+                <param name="out_add" value="fasta,gz"/>
+            </section>
+            <output_collection name="fasta_files" type="list" count="7">
+                <element name="transcriptome01.paralogs" file="test_07_paralogs.fasta" ftype="fasta"/>
             </output_collection>
+            <output name="short_report_pdf" file="test_07_short_report.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
+            <output name="short_report_tabular" file="test_07_short_report.tab" ftype="tabular"/>
+            <output name="compressed_files" ftype="tgz">
+                <assert_contents>
+                    <has_size value="281260" delta="250"/>
+                </assert_contents>
+            </output>         
         </test>
+        <!-- Test 08: Multiple inputs-->
         <test expect_num_outputs="6">
-            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
+            <param name="transcripts" value="transcriptome01.fasta,transcriptome02.fasta"/>
+            <param name="reference" value="reference.fasta"/>
             <conditional name="gene_coordinates">
-                <param name="use_gtf" value="false" />
+                <param name="selector" value="true"/>
+                <param name="gtf" value="reference.gtf"/>
+            </conditional>
+            <section name="output_options">
+                <param name="out_sr" value="tabular,pdf"/>
+            </section>
+            <conditional name="use_busco">
+                <param name="selector" value="true"/>
+                <conditional name="lineage_conditional">
+                    <param name="selector" value="cached"/>
+                    <param name="cached_db" value="busco-demo-db-20230328"/>
+                </conditional>
+            </conditional>
+            <param name="out_add" value="complete,fasta,logs,gz"/>
+            <conditional name="reads_option">
+                <param name="selector" value="single"/>
+                <param name="single_reads" value="single_end.fastq.gz"/>
             </conditional>
-            <param name="min_alignment" value="30" />
-            <param name="lower_threshold" value="45" />
-            <param name="upper_threshold" value="95" />
-            <param name="out_sr" value="txt,tex,tsv,pdf" />
-            <param name="out_add" value="logs,details_plots" />
-
-            <expand macro="pdf_output_test" />
-            <expand macro="tex_output_test" />
-            <expand macro="tsv_output_test" />
-            <expand macro="txt_output_test" />
-            <output_collection name="list_logs" type="list">
-                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
-                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
+            <output name="short_report_tabular" value="test_08_short_report.tab" ftype="tabular"/>
+            <output name="short_report_pdf" value="test_08_short_report.pdf" ftype="pdf"/>
+            <output name="stats" value="test_08_complete_report.tab" ftype="txt"  lines_diff="6" />
+            <output_collection name="fasta_files" type="list" count="14">
+                <element name="transcriptome01.paralogs" file="test_08_paralogs.fasta" ftype="fasta"/>
             </output_collection>
-            <output_collection name="details_png" type="list:list" count="1">
-                <element name="Trinity">
-                    <expand macro="element_has_text" name="Nx" text="PNG" />
-                    <expand macro="element_has_text" name="transcript_length" text="PNG" />
+            <output_collection name="list_logs" type="list" count="14">
+                <element name="STAR.out" ftype="txt">
+                    <assert_contents>
+                        <has_text text="STAR --runThreadN"/>
+                        <has_text text="finished successfully"/>
+                    </assert_contents>
+                </element>
+                <element name="gmap_build.out" ftype="txt">
+                    <assert_contents>
+                        <has_text text="No alternate scaffolds observed"/>
+                    </assert_contents>
+                </element>
+                <element name="rnaQUAST" ftype="txt">
+                    <assert_contents>
+                        <has_text text="THE QUALITY OF TRANSCRIPTOME ASSEMBLY DONE"/>
+                        <has_text text="Thank you for using rnaQUAST!"/>
+                    </assert_contents>
                 </element>
             </output_collection>
         </test>
-        <test expect_num_outputs="6">
-            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
-            <conditional name="gene_coordinates">
-                <param name="use_gtf" value="false" />
-            </conditional>
-            <param name="min_alignment" value="30" />
-            <param name="lower_threshold" value="45" />
-            <param name="upper_threshold" value="95" />
-            <param name="out_sr" value="txt,tex,tsv,pdf" />
-            <param name="out_add" value="logs,details_plots" />
-            <conditional name="busco_option">
-                <param name="busco" value="true"/>
-                <param name="lineage" value="metazoa"/>
-            </conditional>
-            <expand macro="pdf_output_test" />
-            <expand macro="tex_output_test" />
-            <expand macro="tsv_output_test" />
-            <expand macro="txt_output_test" />
-            <output_collection name="list_logs" type="list">
-                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
-                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
-            </output_collection>
-            <output_collection name="details_png" type="list:list" count="1">
-                <element name="Trinity">
-                    <expand macro="element_has_text" name="Nx" text="PNG" />
-                    <expand macro="element_has_text" name="transcript_length" text="PNG" />
-                </element>
-            </output_collection>
-            <assert_command>
-                <has_text text="--busco metazoa"/>
-            </assert_command>
-        </test>
+
     </tests>
     <help><![CDATA[
-**What is rnaQUAST**
-- a quality assessment tool for de novo transcriptome assemblies
-- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
-- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
+
+.. class:: infomark
+
+**Purpose**
 
-**Using rnaQuast without reference** you wont get:
+rnaQUAST is a tool for evaluating RNA-Seq assemblies using reference genome and gene database. In addition, rnaQUAST is also capable 
+of estimating gene database coverage by raw reads and de novo quality assessment.
 
-- x-assembled (Exons)
-- Alignments per Isoform
-- x-covered (Exons)
-- x-matched (Blocks)
-- gmap build logs
+.. class:: infomark
 
-**Using rnaQuast with reference** you will get:
-- Reports
-- Logs
-- Alignement/Basic Metrics
-- Misassemblies/ Specificity/ Sensitivity
-- Alignment multiplicity
-- Block/ Transcript Lentgh
-- Blocks per alignment
-- Mismatch rate
-- x-aligned
-- Nx
-- Blocks per alignment
-- gmap build logs
+**rnaQUAST pipeline**
+
+To evaluate quality of the assembled transcripts, rnaQUAST takes a reference genome in FASTA format and optionally its gene database in 
+GFF/GTF format. A user can provide either a FASTA file with transcripts, which will be aligned to the given reference genome using GMAP 
+or BLAT. The alignments are analyzed to calculate simple metrics and then are matched against the isoforms from the gene database in order 
+to obtain statistics that represent completeness and correctness levels of the assembly. In addition, rnaQUAST is capable of estimating 
+gene database coverage by raw reads using STAR or TopHat2. For de novo quality assessment when reference genome and gene database are 
+unavailable, the transcripts are analyzed using BUSCO.
+
+.. class:: infomark
 
-**Using rnaQuast without gene coordinates** you wont get:
-- x-assembled (Exons)
-- Alignments per Isoform
-- x-covered (Exons)
-- x-matched (Blocks)
-- gmap build logs
-- Database Metrics
-- Alignment multiplicity
-- Mismatch rate
-- NAx
-- x-aligned
-**Using rnaQuast with gene coordinates** you will get:
-- Reports
-- Logs
-- Alignement/Basic Metrics
-- Misassemblies/Specificity/Sensitivity
-- Alignment multiplicity
-- Block/Transcript length
-- Blocks per alignment
-- Mismatch rate
-- x-aligned
-- Nx/NAx
-- gmap build logs
-- Database Metrics
-- Alignment multiplicity
-More informations, see citations.
+**Metrics and alignment analysis**
+
+rnaQUAST calculates various metrics without using alignment information, e.g. length distribution and N50 of the assembled transcripts. 
+Additionally, rnaQUAST computes the following statistics for the gene database: the total number of genes and isoforms, isoform and exon 
+length distribution, average number of exons per gene, etc.
+
+To analyze transcripts' alignments, rnaQUAST firstly filters out short partial alignments (shorter than a user-defined threshold, default 
+value is 50 bp). Such short alignments are typically caused by genomic repeats and thus are ignored. Afterwards, rnaQUAST selects the 
+best-scored spliced alignment for each transcript. If a transcript has more than one alignment with the highest score, it is reported 
+as multiply aligned. Otherwise, it is considered to be uniquely aligned. If the best-scored alignment is discordant (e.g. the transcript 
+has partial alignments that are either mapped to different strands or to different chromosomes) the transcript is classified as misassembled. 
+Transcripts without misassemblies are analyzed to calculate such metrics as average transcript alignment fraction and mismatch rate.
+
+For the simplicity of explanation, transcript is further referred to as a sequence generated by the assembler and isoform denotes a sequence 
+from the gene database. rnaQUAST matches best-scored alignments of non-misassembled transcripts to the isoforms' coordinates and analyzes 
+them to estimate how well the isoforms are covered by the assembly. rnaQUAST computes such metrics as database coverage (the total number 
+of covered bases of all isoforms divided by the total length of all isoforms) and the number of 50%/95%-assembled isoforms. An isoform is 
+considered to be x%-assembled if it has at least x% covered by a single transcript. Vice versa, to evaluate how well the assembled 
+transcripts are covered by the isoforms, rnaQUAST estimates the number of unannotated transcripts (that align to the genome, but do not 
+match to any isoform) and the number of 50%/95%-matched transcripts (that have corresponding fraction mapped to an isoform). Indeed, the 
+thresholds described above (50% and 95%) can be varied by the user.
+
+
     ]]>    </help>
     <citations>
         <citation type="doi">10.1093/bioinformatics/btw218 </citation>
author	iuc
date	Wed, 07 Jun 2023 12:02:03 +0000
parents	f9f2ad782d8f
children	8e66f695d859