rnaquast: rna_quast.xml comparison

comparison rna_quast.xml @ 5:f89e3c318453 draft

planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit c633f5c634128e3c81ab48e94df6f703dd005c46

author	iuc
date	Wed, 07 Jun 2023 12:02:03 +0000
parents	f9f2ad782d8f
children	8e66f695d859

comparison

equal deleted inserted replaced

-:f9f2ad782d8f
+:f89e3c318453
-<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
 <description>A quality assessment tool for De Novo transcriptome assemblies</description>
-<xrefs>
-<xref type="bio.tools">rnaQUAST</xref>
-</xrefs>
 <macros>
-<token name="@TOOL_VERSION@">2.2.1</token>
+<import>macros.xml</import>
-<token name="@VERSION_SUFFIX@">1</token>
-<xml name="element_matching_line" token_name="" token_expression="">
-<element name="@NAME@">
-<assert_contents>
-<has_line_matching expression="@EXPRESSION@" />
-</assert_contents>
-</element>
-</xml>
-<xml name="element_has_text" token_name="" token_text="">
-<element name="@NAME@">
-<assert_contents>
-<has_text text="@TEXT@" />
-</assert_contents>
-</element>
-</xml>
-<xml name="details_output_test" token_assembler="">
-<element name="@ASSEMBLER@">
-<element name="5000%-assembled.list">
-<assert_contents>
-<has_n_lines n="0" />
-</assert_contents>
-</element>
-<element name="9500%-assembled.list">
-<assert_contents>
-<has_n_lines n="0" />
-</assert_contents>
-</element>
-<expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" />
-<expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" />
-<expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" />
-<expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" />
-<expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" />
-<expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" />
-<expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" />
-<expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" />
-<expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" />
-<expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" />
-<expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" />
-</element>
-</xml>
-<xml name="txt_output_test" token_assembler="">
-<output name="short_report_txt">
-<assert_contents>
-<has_text text="SHORT SUMMARY REPORT" />
-</assert_contents>
-</output>
-</xml>
-<xml name="tex_output_test" token_assembler="">
-<output name="short_report_tex">
-<assert_contents>
-<has_text text="Short summary report" />
-<has_text text="end{document}" />
-</assert_contents>
-</output>
-</xml>
-<xml name="tsv_output_test" token_assembler="">
-<output name="short_report_tsv">
-<assert_contents>
-<has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" />
-</assert_contents>
-</output>
-</xml>
-<xml name="pdf_output_test" token_assembler="">
-<output name="short_report_pdf">
-<assert_contents>
-<has_text text="rnaQUAST short report" />
-</assert_contents>
-</output>
-</xml>
 </macros>
-<requirements>
+<expand macro='xrefs'/>
-<requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
+<expand macro='requirements'/>
-</requirements>
 <stdio>
 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
 </stdio>
 <command detect_errors="exit_code"><![CDATA[
-#import re
+mkdir -p './complete_reports/' &&
-#for $i in $transcripts
+mkdir -p './fasta_files/' &&
+#import os, re, glob
+#for $i in $transcripts
 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
 #end for
-#if $r
+#if $reference
-#for $rf in $r
+#for $rf in $reference
 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
 #end for
 #end if
-#if $gene_coordinates.use_gtf == "true"
+#if $gene_coordinates.selector == "true"
 #for $g in $gene_coordinates.gtf
 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
 #end for
 #end if
 mkdir outputdir &&
 rnaQUAST.py
---threads \${GALAXY_SLOTS:-1}
+--threads \${GALAXY_SLOTS:-8}
 --transcripts
 #for $i in $transcripts
 '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
 #end for
-$strand_specific
+#if $reads_option.selector == 'paired'
-#if $r
+--left_reads '${reads_option.forward_reads}'
+--right_reads '${reads_option.reverse_reads}'
+#else if $reads_option.selector == 'single'
+--single_reads '${reads_option.single_reads}'
+#end if
+$advanced_options.strand_specific
+#if $reads_alignment
+--reads_alignment '${reads_alignment}'
+#end if
+#if $reference
 -r
-#for $rf in $r
+#for $rf in $reference
 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
 #end for
 #end if
-#if $gene_coordinates.use_gtf == "true"
+#if $gene_coordinates.selector == "true"
 --gtf
 #for $g in $gene_coordinates.gtf
 '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
 #end for
 $gene_coordinates.disable_infer_genes
 $gene_coordinates.disable_infer_transcripts
 #end if
-$prokaryote
+$advanced_options.prokaryote
---min_alignment '$min_alignment'
+--min_alignment $advanced_options.min_alignment
-#if "pdf" not in $out_sr and "plots" not in $out_add
+$advanced_options.blat
+#if "pdf" not in $output_options.out_sr
 --no_plots
 #end if
-$blat
+#if $use_busco.selector == 'true'
-#if $busco_option.busco == 'true'
+--busco
---busco $busco_option.lineage
+#if $use_busco.lineage_conditional.selector == 'cached':
-#end if
+'${use_busco.lineage_conditional.cached_db.fields.path}'
-##$gene_mark
+#else
-$meta
+$use_busco.lineage
---lower_threshold $lower_threshold
+#end if
---upper_threshold $upper_threshold
+#end if
+## $advanced_options.gene_mark
+$advanced_options.meta
+--lower_threshold $advanced_options.lower_threshold
+--upper_threshold $advanced_options.upper_threshold
 -o outputdir
-&& mkdir details
+#if 'gz' in $output_options.out_add
+&& tar -czvf results.tar.gz './outputdir'
-## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
+#end if
-## to a joint dir (details) to make them discoverable
-## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
+#if len($transcripts) == 1
-#for $i in $transcripts
+#set $path = "/".join(['outputdir',($transcripts[0].element_identifier).split(".")[0]]) + "_output"
-#set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
+&& mv '${path}' './results'
-&&
+## rename .list files to .txt files to make them detectable
-(for f in \$(find 'outputdir/'$basename'_output' -type f);
+&& find './results/' -name "*.list" -exec mv {} {}.txt \;
-do
+&& true
-d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
+&& printf "************ METRICS/TRANSCRIPTS ***************\n" > stats.txt
-mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
+&& for file_name in ./results/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt
-done)
+&& sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt;
-#end for
+done
+&& cat stats.txt > $stats
-## rename .list files to .txt files to make them detectable (format detection by extension)
+#if $gene_coordinates.selector == 'true' and $reference
-## the final `true` seems needed since otherwise the `;` at the end is swallowed
+&& mv ./results/*fasta ./fasta_files/
-&& find details/ -name "*.list" -exec mv {} {}.txt \;
+#end if
-&& true
+#else
+&& mkdir -p './results/'
+#if $gene_coordinates.selector == 'true' and $reference
+#for $i, $transcript in enumerate($transcripts)
+#set $path = "/".join(['outputdir',($transcripts[$i].element_identifier).split(".")[0]]) + "_output"
+&& rm -r ./results
+&& cp -r $path './results'
+&& mv ./results/*fasta './fasta_files/'
+#end for
+#end if
+&& find './outputdir/comparison_output' -name "*.list" -exec mv {} {}.txt \;
+&& true
+&& printf "************ COMPARISON METRICS ***************\n" > stats.txt
+&& for file_name in ./outputdir/comparison_output/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt
+&& sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; done
+&& cat stats.txt > $stats
+#end if
 ]]>    </command>
 <inputs>
 <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/>
-<param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"
+<conditional name="reads_option">
-help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
+<param name="selector" type="select" label="Single-end or paired-end reads">
-<param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
+<option value="" selected="true">Disabled-end</option>
+<option value="single" selected="true">Single-end</option>
+<option value="paired">Paired-end (as individual datasets)</option>
+</param>
+<when value=""/>
+<when value="single">
+<param format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" name="single_reads" type="data" label="RNA-Seq FASTQ/FASTA file"/>
+</when>
+<when value="paired">
+<param name="forward_reads" format="fastq,fastq.gz,fastqsanger ,fastqsanger.gz"  type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>
+<param name="reverse_reads" format="fastq,fastq.gz,fastqsanger, fastqsanger.gz"  type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>
+</when>
+</conditional>
+<param argument="--reference" type="data" format="fasta" label="Reference genome"  multiple="true" optional="true" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
 <conditional name="gene_coordinates">
-<param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
+<param name="selector" type="select" label="Genome annotation" help="Genome annotation file. We recommend to use files downloaded from GENCODE or Ensembl.">
-<option value="true" selected="true">Yes</option>
+<option value="true">Enabled</option>
-<option value="false">No</option>
+<option value="false" selected="true">Disabled</option>
 </param>
 <when value="true">
-<param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
+<param argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
-<param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?"
+<param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" Disable infer genes"
 help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/>
-<param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
+<param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="Disable infer transcripts" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
 </when>
 <when value="false">
 </when>
 </conditional>
-<param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/>
+<param argument="--reads_alignment" type="data" format="sam" label="Aligned reads to reference genome" optional="true" help="File with read alignments to the reference genome" />
-<param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
+<conditional name="use_busco">
-<param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
+<param argument="selector" type="select" label="Run BUSCO" help="BUSCO allows to detect core genes in the assembled transcripts">
-<conditional name="busco_option">
-<param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts">
 <option value="false">Disabled</option>
 <option value="true">Enabled</option>
 </param>
 <when value="false"/>
 <when value="true">
-<param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
+<conditional name="lineage_conditional">
-<option value="metazoa">Metazoa</option>
+<param name="selector" type="select" label="Lineage data source">
-<option value="eukaryota">Eukaryota</option>
+<option value="download">Download lineage data</option>
-<option value="arthropoda">Arthropoda</option>
+<option value="cached" selected="true">Use cached lineage data</option>
-<option value="vertebrata">Vertebrata</option>
+</param>
-<option value="fungi">Fungi</option>
+<when value="cached">
-<option value="bacteria">Bacteria</option>
+<param name="cached_db" label="Cached database with lineage" type="select">
-</param>
+<options from_data_table="busco_database">
+<validator message="No BUSCO database is available" type="no_options" />
+</options>
+</param>
+</when>
+<when value="download">
+<param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
+<option value="metazoa">Metazoa</option>
+<option value="eukaryota">Eukaryota</option>
+<option value="arthropoda">Arthropoda</option>
+<option value="vertebrata">Vertebrata</option>
+<option value="fungi">Fungi</option>
+<option value="bacteria">Bacteria</option>
+</param>
+</when>
+</conditional>
 </when>
 </conditional>
-<!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
+<section name="advanced_options" title="Advaced options" >
-<param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
+<param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific RNA-seq data"
-<param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
+help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
-<param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
+<param argument="--min_alignment" type="integer" min="0" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
-<param name="out_sr" type="select" multiple="true" label="Short report formats">
+<param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT instead of GMAP" help="BALT is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
-<option value="tsv" selected="true">tabular</option>
+<!-- GeneMarkST is not in Bioconda -->
-<option value="txt">txt</option>
+<!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"
-<option value="tex">tex</option>
+help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
-<option value="pdf" selected="true">pdf</option>
+<param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
-</param>
+<param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
-<param name="out_add" type="select" multiple="true" label="Additional outputs">
+<param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
-<option value="logs">Logs</option>
+<param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Prokararyotic organism(s)" help="Use this option if the genome is prokaryotic"/>
-<option value="plots" selected="true">Plots (only for n>1)</option>
+</section>
-<option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
+<section name="output_options" title="Output options" expanded="true">
-<option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
+<param name="out_sr" type="select" multiple="true"  display="checkboxes" label="Short report formats">
-<option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
+<option value="tabular">Tabular</option>
-</param>
+<option value="tex">TeX</option>
+<option value="pdf" selected="true">PDF</option>
+</param>
+<param name="out_add" type="select" label="Additional outputs" multiple="true" display="checkboxes">
+<option value="complete">Complete report</option>
+<option value="fasta" >FASTA files</option>
+<option value="logs">Logs</option>
+<option value="gz">Compressed output folder</option>
+</param>
+</section>
 </inputs>
 <outputs>
-<data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
+<data name="stats" format="txt" label="${tool.name} on ${on_string}: complete report">
-<filter>"pdf" in out_sr</filter>
+<filter>output_options['out_add'] and "complete" in  output_options['out_add']</filter>
-</data>
-<data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
-<filter>"txt" in out_sr</filter>
-</data>
-<data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
-<filter>"tex" in out_sr</filter>
-</data>
-<data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
-<filter>"tsv" in out_sr</filter>
 </data>
 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs">
-<discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" />
+<discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs" visible="false" />
-<filter>"logs" in out_add</filter>
+<filter>output_options['out_add'] and "logs" in  output_options['out_add']</filter>
 </collection>
-<!-- note the output filter of the next two outputs checks if there is
+<collection name="fasta_files" type="list" label="${tool.name} on ${on_string}: FASTA files">
-more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) -->
+<discover_datasets ext="fasta" pattern="(?P&lt;name&gt;.+)\.fasta" directory="fasta_files" visible="false" />
-<collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots">
+<filter>output_options['out_add'] and "fasta" in  output_options['out_add']</filter>
-<discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" />
+<filter>gene_coordinates['selector'] == 'true'</filter>
-<filter> isinstance(transcripts, list) and "plots" in out_add</filter>
+<filter>reference</filter>
 </collection>
-<collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison">
+<data name="compressed_files" format="tgz" label="${tool.name} on ${on_string}: compressed results folder" from_work_dir="results.tar.gz">
-<discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
+<filter>output_options['out_add'] and "gz" in output_options['out_add']</filter>
-<filter> isinstance(transcripts, list) and "comparison" in out_add</filter>
+</data>
-</collection>
+<data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: short report (pdf)" from_work_dir="outputdir/short_report.pdf">
-<collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
+<filter>output_options['out_sr'] and "pdf" in output_options['out_sr']</filter>
-<discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false" />
+</data>
-<filter>"details" in out_add</filter>
+<data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: short report (tex)" from_work_dir="outputdir/short_report.tex">
-</collection>
+<filter>output_options['out_sr'] and "tex" in output_options['out_sr']</filter>
-<collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
+</data>
-<discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false" />
+<data name="short_report_tabular" format="tabular" label="${tool.name} on ${on_string}: short report (tabular)" from_work_dir="outputdir/short_report.tsv">
-<filter>"details_plots" in out_add</filter>
+<filter>output_options['out_sr'] and "tabular" in output_options['out_sr']</filter>
-</collection>
+</data>
 </outputs>
 <tests>
-<test expect_num_outputs="7">
+<!-- Test 01: Minimum input txt output-->
-<param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" />
+<test expect_num_outputs="1">
-<param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
+<param name="transcripts" value="transcriptome01.fasta"/>
-<conditional name="gene_coordinates">
+<section name="output_options">
-<param name="use_gtf" value="true" />
+<param name="out_sr" value="tabular"/>
-<param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
+</section>
-<param name="disable_infer_genes" value="true" />
+<output name="short_report_tabular" file="test_01_short_report.tab"/>
-<param name="disable_infer_transcripts" value="true" />
+</test>
-</conditional>
+<!-- Test 02: Transcriptome reference,single read,  txt output-->
-<param name="out_sr" value="txt,tex,tsv" />
+<test expect_num_outputs="1">
-<param name="out_add" value="logs,comparison,plots,details" />
+<param name="transcripts" value="transcriptome01.fasta"/>
-<expand macro="txt_output_test" />
+<section name="output_options">
-<expand macro="tex_output_test" />
+<param name="out_sr" value="tabular"/>
-<expand macro="tsv_output_test" />
+</section>
-<output_collection name="comparison_png" type="list" count="15" />
+<conditional name="reads_option">
-<output_collection name="comparison" type="list" count="19" />
+<param name="selector" value="single"/>
-<output_collection name="list_logs" type="list" count="8" />
+<param name="single_reads" value="single_end.fastq.gz"/>
-<output_collection name="details" type="list:list" count="2">
+</conditional>
-<expand macro="details_output_test" assembler="Trinity" />
+<output name="short_report_tabular">
-<expand macro="details_output_test" assembler="idba" />
+<assert_contents>
+<has_text text="Transcripts" />
+<has_size value="95" delta="5"/>
+</assert_contents>
+</output>
+</test>
+<!-- Test 03: Transcriptome reference and annotation, txt output-->
+<test expect_num_outputs="1">
+<param name="transcripts" value="transcriptome01.fasta"/>
+<conditional name="gene_coordinates">
+<param name="selector" value="true"/>
+<param name="gtf" value="reference.gtf"/>
+</conditional>
+<section name="output_options">
+<param name="out_sr" value="tabular"/>
+</section>
+<conditional name="reads_option">
+<param name="selector" value="single"/>
+<param name="single_reads" value=""/>
+</conditional>
+<output name="short_report_tabular" file="test_03_short_report.tab"/>
+</test>
+<!-- Test 04: Transcriptome reference and annotation, txt output-->
+<test expect_num_outputs="1">
+<param name="transcripts" value="transcriptome01.fasta"/>
+<conditional name="gene_coordinates">
+<param name="selector" value="true"/>
+<param name="gtf" value="reference.gtf"/>
+</conditional>
+<section name="output_options">
+<param name="out_sr" value="tabular"/>
+</section>
+<conditional name="reads_option">
+<param name="selector" value="single"/>
+<param name="single_reads" value="single_end.fastq.gz"/>
+</conditional>
+<output name="short_report_tabular">
+<assert_contents>
+<has_text text="Transcripts" />
+<has_size value="140" delta="5"/>
+</assert_contents>
+</output>
+</test>
+<!-- Test 05: Transcriptome reference, annotation and mapping, txt output-->
+<test expect_num_outputs="1">
+<param name="transcripts" value="transcriptome01.fasta"/>
+<conditional name="gene_coordinates">
+<param name="selector" value="true"/>
+<param name="gtf" value="reference.gtf"/>
+</conditional>
+<section name="output_options">
+<param name="out_sr" value="tabular"/>
+</section>
+<conditional name="reads_option">
+<param name="selector" value='paired'/>
+<param name="forward_reads" value="input_F.fastqsanger"/>
+<param name="reverse_reads" value="input_F.fastqsanger"/>
+</conditional>
+<output name="short_report_tabular">
+<assert_contents>
+<has_text text="Transcripts" />
+<has_size value="140" delta="5"/>
+</assert_contents>
+</output>
+</test>
+<!-- Test 06: Transcriptome reference, annotation, mapping and BUSCO, txt output-->
+<test expect_num_outputs="1">
+<param name="transcripts" value="transcriptome01.fasta"/>
+<conditional name="gene_coordinates">
+<param name="selector" value="true"/>
+<param name="gtf" value="reference.gtf"/>
+</conditional>
+<conditional name="reads_option">
+<param name="selector" value='paired'/>
+<param name="forward_reads" value="input_F.fastqsanger"/>
+<param name="reverse_reads" value="input_R.fastqsanger"/>
+</conditional>
+<section name="output_options">
+<param name="out_sr" value="tabular"/>
+</section>
+<conditional name="use_busco">
+<param name="selector" value="true"/>
+<conditional name="lineage_conditional">
+<param name="selector" value="cached"/>
+<param name="cached_db" value="busco-demo-db-20230328"/>
+</conditional>
+</conditional>
+<output name="short_report_tabular">
+<assert_contents>
+<has_text text="Transcripts" />
+<has_size value="140" delta="5"/>
+</assert_contents>
+</output>
+</test>
+<!-- Test 07: Transcriptome reference, annotation, mapping and BUSCO, additional outputs-->
+<test expect_num_outputs="4">
+<param name="transcripts" value="transcriptome01.fasta"/>
+<conditional name="gene_coordinates">
+<param name="selector" value="true"/>
+<param name="gtf" value="reference.gtf"/>
+</conditional>
+<param name="reference" value="reference.fasta"/>
+<conditional name="reads_option">
+<param name="selector" value='paired'/>
+<param name="forward_reads" value="input_F.fastqsanger"/>
+<param name="reverse_reads" value="input_R.fastqsanger"/>
+</conditional>
+<conditional name="use_busco">
+<param name="selector" value="true"/>
+<conditional name="lineage_conditional">
+<param name="selector" value="cached"/>
+<param name="cached_db" value="busco-demo-db-20230328"/>
+</conditional>
+</conditional>
+<section name="output_options">
+<param name="out_sr" value="pdf,tabular"/>
+<param name="out_add" value="fasta,gz"/>
+</section>
+<output_collection name="fasta_files" type="list" count="7">
+<element name="transcriptome01.paralogs" file="test_07_paralogs.fasta" ftype="fasta"/>
 </output_collection>
-</test>
+<output name="short_report_pdf" file="test_07_short_report.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
+<output name="short_report_tabular" file="test_07_short_report.tab" ftype="tabular"/>
+<output name="compressed_files" ftype="tgz">
+<assert_contents>
+<has_size value="281260" delta="250"/>
+</assert_contents>
+</output>
+</test>
+<!-- Test 08: Multiple inputs-->
 <test expect_num_outputs="6">
-<param name="transcripts" value="Trinity.fasta" ftype="fasta" />
+<param name="transcripts" value="transcriptome01.fasta,transcriptome02.fasta"/>
-<conditional name="gene_coordinates">
+<param name="reference" value="reference.fasta"/>
-<param name="use_gtf" value="false" />
+<conditional name="gene_coordinates">
-</conditional>
+<param name="selector" value="true"/>
-<param name="min_alignment" value="30" />
+<param name="gtf" value="reference.gtf"/>
-<param name="lower_threshold" value="45" />
+</conditional>
-<param name="upper_threshold" value="95" />
+<section name="output_options">
-<param name="out_sr" value="txt,tex,tsv,pdf" />
+<param name="out_sr" value="tabular,pdf"/>
-<param name="out_add" value="logs,details_plots" />
+</section>
+<conditional name="use_busco">
-<expand macro="pdf_output_test" />
+<param name="selector" value="true"/>
-<expand macro="tex_output_test" />
+<conditional name="lineage_conditional">
-<expand macro="tsv_output_test" />
+<param name="selector" value="cached"/>
-<expand macro="txt_output_test" />
+<param name="cached_db" value="busco-demo-db-20230328"/>
-<output_collection name="list_logs" type="list">
+</conditional>
-<expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
+</conditional>
-<expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
+<param name="out_add" value="complete,fasta,logs,gz"/>
+<conditional name="reads_option">
+<param name="selector" value="single"/>
+<param name="single_reads" value="single_end.fastq.gz"/>
+</conditional>
+<output name="short_report_tabular" value="test_08_short_report.tab" ftype="tabular"/>
+<output name="short_report_pdf" value="test_08_short_report.pdf" ftype="pdf"/>
+<output name="stats" value="test_08_complete_report.tab" ftype="txt"  lines_diff="6" />
+<output_collection name="fasta_files" type="list" count="14">
+<element name="transcriptome01.paralogs" file="test_08_paralogs.fasta" ftype="fasta"/>
 </output_collection>
-<output_collection name="details_png" type="list:list" count="1">
+<output_collection name="list_logs" type="list" count="14">
-<element name="Trinity">
+<element name="STAR.out" ftype="txt">
-<expand macro="element_has_text" name="Nx" text="PNG" />
+<assert_contents>
-<expand macro="element_has_text" name="transcript_length" text="PNG" />
+<has_text text="STAR --runThreadN"/>
+<has_text text="finished successfully"/>
+</assert_contents>
+</element>
+<element name="gmap_build.out" ftype="txt">
+<assert_contents>
+<has_text text="No alternate scaffolds observed"/>
+</assert_contents>
+</element>
+<element name="rnaQUAST" ftype="txt">
+<assert_contents>
+<has_text text="THE QUALITY OF TRANSCRIPTOME ASSEMBLY DONE"/>
+<has_text text="Thank you for using rnaQUAST!"/>
+</assert_contents>
 </element>
 </output_collection>
 </test>
-<test expect_num_outputs="6">
-<param name="transcripts" value="Trinity.fasta" ftype="fasta" />
-<conditional name="gene_coordinates">
-<param name="use_gtf" value="false" />
-</conditional>
-<param name="min_alignment" value="30" />
-<param name="lower_threshold" value="45" />
-<param name="upper_threshold" value="95" />
-<param name="out_sr" value="txt,tex,tsv,pdf" />
-<param name="out_add" value="logs,details_plots" />
-<conditional name="busco_option">
-<param name="busco" value="true"/>
-<param name="lineage" value="metazoa"/>
-</conditional>
-<expand macro="pdf_output_test" />
-<expand macro="tex_output_test" />
-<expand macro="tsv_output_test" />
-<expand macro="txt_output_test" />
-<output_collection name="list_logs" type="list">
-<expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
-<expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
-</output_collection>
-<output_collection name="details_png" type="list:list" count="1">
-<element name="Trinity">
-<expand macro="element_has_text" name="Nx" text="PNG" />
-<expand macro="element_has_text" name="transcript_length" text="PNG" />
-</element>
-</output_collection>
-<assert_command>
-<has_text text="--busco metazoa"/>
-</assert_command>
-</test>
 </tests>
 <help><![CDATA[
-**What is rnaQUAST**
-- a quality assessment tool for de novo transcriptome assemblies
+.. class:: infomark
-- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
-- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
+**Purpose**
-**Using rnaQuast without reference** you wont get:
+rnaQUAST is a tool for evaluating RNA-Seq assemblies using reference genome and gene database. In addition, rnaQUAST is also capable
+of estimating gene database coverage by raw reads and de novo quality assessment.
-- x-assembled (Exons)
-- Alignments per Isoform
+.. class:: infomark
-- x-covered (Exons)
-- x-matched (Blocks)
+**rnaQUAST pipeline**
-- gmap build logs
+To evaluate quality of the assembled transcripts, rnaQUAST takes a reference genome in FASTA format and optionally its gene database in
-**Using rnaQuast with reference** you will get:
+GFF/GTF format. A user can provide either a FASTA file with transcripts, which will be aligned to the given reference genome using GMAP
-- Reports
+or BLAT. The alignments are analyzed to calculate simple metrics and then are matched against the isoforms from the gene database in order
-- Logs
+to obtain statistics that represent completeness and correctness levels of the assembly. In addition, rnaQUAST is capable of estimating
-- Alignement/Basic Metrics
+gene database coverage by raw reads using STAR or TopHat2. For de novo quality assessment when reference genome and gene database are
-- Misassemblies/ Specificity/ Sensitivity
+unavailable, the transcripts are analyzed using BUSCO.
-- Alignment multiplicity
-- Block/ Transcript Lentgh
+.. class:: infomark
-- Blocks per alignment
-- Mismatch rate
+**Metrics and alignment analysis**
-- x-aligned
-- Nx
+rnaQUAST calculates various metrics without using alignment information, e.g. length distribution and N50 of the assembled transcripts.
-- Blocks per alignment
+Additionally, rnaQUAST computes the following statistics for the gene database: the total number of genes and isoforms, isoform and exon
-- gmap build logs
+length distribution, average number of exons per gene, etc.
-**Using rnaQuast without gene coordinates** you wont get:
+To analyze transcripts' alignments, rnaQUAST firstly filters out short partial alignments (shorter than a user-defined threshold, default
-- x-assembled (Exons)
+value is 50 bp). Such short alignments are typically caused by genomic repeats and thus are ignored. Afterwards, rnaQUAST selects the
-- Alignments per Isoform
+best-scored spliced alignment for each transcript. If a transcript has more than one alignment with the highest score, it is reported
-- x-covered (Exons)
+as multiply aligned. Otherwise, it is considered to be uniquely aligned. If the best-scored alignment is discordant (e.g. the transcript
-- x-matched (Blocks)
+has partial alignments that are either mapped to different strands or to different chromosomes) the transcript is classified as misassembled.
-- gmap build logs
+Transcripts without misassemblies are analyzed to calculate such metrics as average transcript alignment fraction and mismatch rate.
-- Database Metrics
-- Alignment multiplicity
+For the simplicity of explanation, transcript is further referred to as a sequence generated by the assembler and isoform denotes a sequence
-- Mismatch rate
+from the gene database. rnaQUAST matches best-scored alignments of non-misassembled transcripts to the isoforms' coordinates and analyzes
-- NAx
+them to estimate how well the isoforms are covered by the assembly. rnaQUAST computes such metrics as database coverage (the total number
-- x-aligned
+of covered bases of all isoforms divided by the total length of all isoforms) and the number of 50%/95%-assembled isoforms. An isoform is
-**Using rnaQuast with gene coordinates** you will get:
+considered to be x%-assembled if it has at least x% covered by a single transcript. Vice versa, to evaluate how well the assembled
-- Reports
+transcripts are covered by the isoforms, rnaQUAST estimates the number of unannotated transcripts (that align to the genome, but do not
-- Logs
+match to any isoform) and the number of 50%/95%-matched transcripts (that have corresponding fraction mapped to an isoform). Indeed, the
-- Alignement/Basic Metrics
+thresholds described above (50% and 95%) can be varied by the user.
-- Misassemblies/Specificity/Sensitivity
-- Alignment multiplicity
-- Block/Transcript length
-- Blocks per alignment
-- Mismatch rate
-- x-aligned
-- Nx/NAx
-- gmap build logs
-- Database Metrics
-- Alignment multiplicity
-More informations, see citations.
 ]]>    </help>
 <citations>
 <citation type="doi">10.1093/bioinformatics/btw218 </citation>
 </citations>
 </tool>

Mercurial > repos > iuc > rnaquast

comparison rna_quast.xml @ 5:f89e3c318453 draft