Mercurial > repos > iuc > rnaquast
diff rna_quast.xml @ 5:f89e3c318453 draft
planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit c633f5c634128e3c81ab48e94df6f703dd005c46
author | iuc |
---|---|
date | Wed, 07 Jun 2023 12:02:03 +0000 |
parents | f9f2ad782d8f |
children | 8e66f695d859 |
line wrap: on
line diff
--- a/rna_quast.xml Thu Jan 20 21:09:47 2022 +0000 +++ b/rna_quast.xml Wed Jun 07 12:02:03 2023 +0000 @@ -1,125 +1,54 @@ -<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> +<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>A quality assessment tool for De Novo transcriptome assemblies</description> - <xrefs> - <xref type="bio.tools">rnaQUAST</xref> - </xrefs> <macros> - <token name="@TOOL_VERSION@">2.2.1</token> - <token name="@VERSION_SUFFIX@">1</token> - <xml name="element_matching_line" token_name="" token_expression=""> - <element name="@NAME@"> - <assert_contents> - <has_line_matching expression="@EXPRESSION@" /> - </assert_contents> - </element> - </xml> - <xml name="element_has_text" token_name="" token_text=""> - <element name="@NAME@"> - <assert_contents> - <has_text text="@TEXT@" /> - </assert_contents> - </element> - </xml> - <xml name="details_output_test" token_assembler=""> - <element name="@ASSEMBLER@"> - <element name="5000%-assembled.list"> - <assert_contents> - <has_n_lines n="0" /> - </assert_contents> - </element> - <element name="9500%-assembled.list"> - <assert_contents> - <has_n_lines n="0" /> - </assert_contents> - </element> - <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" /> - <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" /> - <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" /> - <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" /> - <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" /> - <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" /> - <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" /> - <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" /> - <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" /> - <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" /> - <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" /> - </element> - </xml> - - <xml name="txt_output_test" token_assembler=""> - <output name="short_report_txt"> - <assert_contents> - <has_text text="SHORT SUMMARY REPORT" /> - </assert_contents> - </output> - </xml> - <xml name="tex_output_test" token_assembler=""> - <output name="short_report_tex"> - <assert_contents> - <has_text text="Short summary report" /> - <has_text text="end{document}" /> - </assert_contents> - </output> - </xml> - <xml name="tsv_output_test" token_assembler=""> - <output name="short_report_tsv"> - <assert_contents> - <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" /> - </assert_contents> - </output> - </xml> - <xml name="pdf_output_test" token_assembler=""> - <output name="short_report_pdf"> - <assert_contents> - <has_text text="rnaQUAST short report" /> - </assert_contents> - </output> - </xml> + <import>macros.xml</import> </macros> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement> - </requirements> + <expand macro='xrefs'/> + <expand macro='requirements'/> <stdio> <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" /> </stdio> <command detect_errors="exit_code"><![CDATA[ - #import re - #for $i in $transcripts + mkdir -p './complete_reports/' && + mkdir -p './fasta_files/' && + #import os, re, glob + #for $i in $transcripts ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && #end for - #if $r - #for $rf in $r + #if $reference + #for $rf in $reference ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' && #end for #end if - #if $gene_coordinates.use_gtf == "true" + #if $gene_coordinates.selector == "true" #for $g in $gene_coordinates.gtf ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' && #end for - #end if + #end if mkdir outputdir && rnaQUAST.py - --threads \${GALAXY_SLOTS:-1} + --threads \${GALAXY_SLOTS:-8} --transcripts #for $i in $transcripts '${re.sub('[^\w\-.]', '_', i.element_identifier)}' #end for - $strand_specific - #if $r + #if $reads_option.selector == 'paired' + --left_reads '${reads_option.forward_reads}' + --right_reads '${reads_option.reverse_reads}' + #else if $reads_option.selector == 'single' + --single_reads '${reads_option.single_reads}' + #end if + $advanced_options.strand_specific + #if $reads_alignment + --reads_alignment '${reads_alignment}' + #end if + #if $reference -r - #for $rf in $r + #for $rf in $reference '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' #end for #end if - #if $gene_coordinates.use_gtf == "true" + #if $gene_coordinates.selector == "true" --gtf #for $g in $gene_coordinates.gtf '${re.sub('[^\w\-.]', '_', g.element_identifier)}' @@ -127,271 +56,428 @@ $gene_coordinates.disable_infer_genes $gene_coordinates.disable_infer_transcripts #end if - $prokaryote - --min_alignment '$min_alignment' - #if "pdf" not in $out_sr and "plots" not in $out_add + $advanced_options.prokaryote + --min_alignment $advanced_options.min_alignment + $advanced_options.blat + + #if "pdf" not in $output_options.out_sr --no_plots #end if - $blat - #if $busco_option.busco == 'true' - --busco $busco_option.lineage + #if $use_busco.selector == 'true' + --busco + #if $use_busco.lineage_conditional.selector == 'cached': + '${use_busco.lineage_conditional.cached_db.fields.path}' + #else + $use_busco.lineage + #end if #end if - ##$gene_mark - $meta - --lower_threshold $lower_threshold - --upper_threshold $upper_threshold + ## $advanced_options.gene_mark + $advanced_options.meta + --lower_threshold $advanced_options.lower_threshold + --upper_threshold $advanced_options.upper_threshold -o outputdir - && mkdir details - - ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output) - ## to a joint dir (details) to make them discoverable - ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work) - #for $i in $transcripts - #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0] - && - (for f in \$(find 'outputdir/'$basename'_output' -type f); - do - d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) && - mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')"; - done) - #end for - - ## rename .list files to .txt files to make them detectable (format detection by extension) - ## the final `true` seems needed since otherwise the `;` at the end is swallowed - && find details/ -name "*.list" -exec mv {} {}.txt \; - && true + #if 'gz' in $output_options.out_add + && tar -czvf results.tar.gz './outputdir' + #end if + + #if len($transcripts) == 1 + #set $path = "/".join(['outputdir',($transcripts[0].element_identifier).split(".")[0]]) + "_output" + && mv '${path}' './results' + ## rename .list files to .txt files to make them detectable + && find './results/' -name "*.list" -exec mv {} {}.txt \; + && true + && printf "************ METRICS/TRANSCRIPTS ***************\n" > stats.txt + && for file_name in ./results/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt + && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; + done + && cat stats.txt > $stats + #if $gene_coordinates.selector == 'true' and $reference + && mv ./results/*fasta ./fasta_files/ + #end if + #else + && mkdir -p './results/' + #if $gene_coordinates.selector == 'true' and $reference + #for $i, $transcript in enumerate($transcripts) + #set $path = "/".join(['outputdir',($transcripts[$i].element_identifier).split(".")[0]]) + "_output" + && rm -r ./results + && cp -r $path './results' + && mv ./results/*fasta './fasta_files/' + #end for + #end if + && find './outputdir/comparison_output' -name "*.list" -exec mv {} {}.txt \; + && true + && printf "************ COMPARISON METRICS ***************\n" > stats.txt + && for file_name in ./outputdir/comparison_output/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt + && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; done + && cat stats.txt > $stats + #end if ]]> </command> <inputs> <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/> - <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" - help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/> - <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." /> + <conditional name="reads_option"> + <param name="selector" type="select" label="Single-end or paired-end reads"> + <option value="" selected="true">Disabled-end</option> + <option value="single" selected="true">Single-end</option> + <option value="paired">Paired-end (as individual datasets)</option> + </param> + <when value=""/> + <when value="single"> + <param format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" name="single_reads" type="data" label="RNA-Seq FASTQ/FASTA file"/> + </when> + <when value="paired"> + <param name="forward_reads" format="fastq,fastq.gz,fastqsanger ,fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/> + <param name="reverse_reads" format="fastq,fastq.gz,fastqsanger, fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/> + </when> + </conditional> + <param argument="--reference" type="data" format="fasta" label="Reference genome" multiple="true" optional="true" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." /> <conditional name="gene_coordinates"> - <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl."> - <option value="true" selected="true">Yes</option> - <option value="false">No</option> + <param name="selector" type="select" label="Genome annotation" help="Genome annotation file. We recommend to use files downloaded from GENCODE or Ensembl."> + <option value="true">Enabled</option> + <option value="false" selected="true">Disabled</option> </param> <when value="true"> - <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" /> - <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" + <param argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" /> + <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" Disable infer genes" help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/> - <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/> + <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="Disable infer transcripts" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/> </when> <when value="false"> </when> </conditional> - <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/> - <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/> - <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " /> - <conditional name="busco_option"> - <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts"> + <param argument="--reads_alignment" type="data" format="sam" label="Aligned reads to reference genome" optional="true" help="File with read alignments to the reference genome" /> + <conditional name="use_busco"> + <param argument="selector" type="select" label="Run BUSCO" help="BUSCO allows to detect core genes in the assembled transcripts"> <option value="false">Disabled</option> <option value="true">Enabled</option> </param> <when value="false"/> <when value="true"> - <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO"> - <option value="metazoa">Metazoa</option> - <option value="eukaryota">Eukaryota</option> - <option value="arthropoda">Arthropoda</option> - <option value="vertebrata">Vertebrata</option> - <option value="fungi">Fungi</option> - <option value="bacteria">Bacteria</option> - </param> + <conditional name="lineage_conditional"> + <param name="selector" type="select" label="Lineage data source"> + <option value="download">Download lineage data</option> + <option value="cached" selected="true">Use cached lineage data</option> + </param> + <when value="cached"> + <param name="cached_db" label="Cached database with lineage" type="select"> + <options from_data_table="busco_database"> + <validator message="No BUSCO database is available" type="no_options" /> + </options> + </param> + </when> + <when value="download"> + <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO"> + <option value="metazoa">Metazoa</option> + <option value="eukaryota">Eukaryota</option> + <option value="arthropoda">Arthropoda</option> + <option value="vertebrata">Vertebrata</option> + <option value="fungi">Fungi</option> + <option value="bacteria">Bacteria</option> + </param> + </when> + </conditional> </when> </conditional> - <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/--> - <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" /> - <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." /> - <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." /> - <param name="out_sr" type="select" multiple="true" label="Short report formats"> - <option value="tsv" selected="true">tabular</option> - <option value="txt">txt</option> - <option value="tex">tex</option> - <option value="pdf" selected="true">pdf</option> - </param> - <param name="out_add" type="select" multiple="true" label="Additional outputs"> - <option value="logs">Logs</option> - <option value="plots" selected="true">Plots (only for n>1)</option> - <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option> - <option value="details" selected="true">Details per Chromosomes/scaffolds file</option> - <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option> - </param> + <section name="advanced_options" title="Advaced options" > + <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific RNA-seq data" + help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/> + <param argument="--min_alignment" type="integer" min="0" value="50" label="Minimal alignment length to be used" help="Default value is 50"/> + <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT instead of GMAP" help="BALT is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " /> + <!-- GeneMarkST is not in Bioconda --> + <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" + help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/--> + <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" /> + <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." /> + <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." /> + <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Prokararyotic organism(s)" help="Use this option if the genome is prokaryotic"/> + </section> + <section name="output_options" title="Output options" expanded="true"> + <param name="out_sr" type="select" multiple="true" display="checkboxes" label="Short report formats"> + <option value="tabular">Tabular</option> + <option value="tex">TeX</option> + <option value="pdf" selected="true">PDF</option> + </param> + <param name="out_add" type="select" label="Additional outputs" multiple="true" display="checkboxes"> + <option value="complete">Complete report</option> + <option value="fasta" >FASTA files</option> + <option value="logs">Logs</option> + <option value="gz">Compressed output folder</option> + </param> + </section> </inputs> - <outputs> - <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf"> - <filter>"pdf" in out_sr</filter> - </data> - <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt"> - <filter>"txt" in out_sr</filter> - </data> - <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex"> - <filter>"tex" in out_sr</filter> - </data> - <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv"> - <filter>"tsv" in out_sr</filter> + <data name="stats" format="txt" label="${tool.name} on ${on_string}: complete report"> + <filter>output_options['out_add'] and "complete" in output_options['out_add']</filter> </data> <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs"> - <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs/" visible="false" /> - <filter>"logs" in out_add</filter> + <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs" visible="false" /> + <filter>output_options['out_add'] and "logs" in output_options['out_add']</filter> </collection> - <!-- note the output filter of the next two outputs checks if there is - more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) --> - <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots"> - <discover_datasets ext="png" pattern="(?P<name>.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" /> - <filter> isinstance(transcripts, list) and "plots" in out_add</filter> + <collection name="fasta_files" type="list" label="${tool.name} on ${on_string}: FASTA files"> + <discover_datasets ext="fasta" pattern="(?P<name>.+)\.fasta" directory="fasta_files" visible="false" /> + <filter>output_options['out_add'] and "fasta" in output_options['out_add']</filter> + <filter>gene_coordinates['selector'] == 'true'</filter> + <filter>reference</filter> </collection> - <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison"> - <discover_datasets ext="txt" pattern="(?P<name>.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" /> - <filter> isinstance(transcripts, list) and "comparison" in out_add</filter> - </collection> - <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output"> - <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>txt)" directory="details/" visible="false" /> - <filter>"details" in out_add</filter> - </collection> - <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots"> - <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>png)" directory="details/" visible="false" /> - <filter>"details_plots" in out_add</filter> - </collection> + <data name="compressed_files" format="tgz" label="${tool.name} on ${on_string}: compressed results folder" from_work_dir="results.tar.gz"> + <filter>output_options['out_add'] and "gz" in output_options['out_add']</filter> + </data> + <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: short report (pdf)" from_work_dir="outputdir/short_report.pdf"> + <filter>output_options['out_sr'] and "pdf" in output_options['out_sr']</filter> + </data> + <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: short report (tex)" from_work_dir="outputdir/short_report.tex"> + <filter>output_options['out_sr'] and "tex" in output_options['out_sr']</filter> + </data> + <data name="short_report_tabular" format="tabular" label="${tool.name} on ${on_string}: short report (tabular)" from_work_dir="outputdir/short_report.tsv"> + <filter>output_options['out_sr'] and "tabular" in output_options['out_sr']</filter> + </data> </outputs> <tests> - <test expect_num_outputs="7"> - <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" /> - <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" /> + <!-- Test 01: Minimum input txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <output name="short_report_tabular" file="test_01_short_report.tab"/> + </test> + <!-- Test 02: Transcriptome reference,single read, txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <conditional name="reads_option"> + <param name="selector" value="single"/> + <param name="single_reads" value="single_end.fastq.gz"/> + </conditional> + <output name="short_report_tabular"> + <assert_contents> + <has_text text="Transcripts" /> + <has_size value="95" delta="5"/> + </assert_contents> + </output> + </test> + <!-- Test 03: Transcriptome reference and annotation, txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> <conditional name="gene_coordinates"> - <param name="use_gtf" value="true" /> - <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" /> - <param name="disable_infer_genes" value="true" /> - <param name="disable_infer_transcripts" value="true" /> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> + </conditional> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <conditional name="reads_option"> + <param name="selector" value="single"/> + <param name="single_reads" value=""/> + </conditional> + <output name="short_report_tabular" file="test_03_short_report.tab"/> + </test> + <!-- Test 04: Transcriptome reference and annotation, txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> + <conditional name="gene_coordinates"> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> + </conditional> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <conditional name="reads_option"> + <param name="selector" value="single"/> + <param name="single_reads" value="single_end.fastq.gz"/> + </conditional> + <output name="short_report_tabular"> + <assert_contents> + <has_text text="Transcripts" /> + <has_size value="140" delta="5"/> + </assert_contents> + </output> + </test> + <!-- Test 05: Transcriptome reference, annotation and mapping, txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> + <conditional name="gene_coordinates"> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> </conditional> - <param name="out_sr" value="txt,tex,tsv" /> - <param name="out_add" value="logs,comparison,plots,details" /> - <expand macro="txt_output_test" /> - <expand macro="tex_output_test" /> - <expand macro="tsv_output_test" /> - <output_collection name="comparison_png" type="list" count="15" /> - <output_collection name="comparison" type="list" count="19" /> - <output_collection name="list_logs" type="list" count="8" /> - <output_collection name="details" type="list:list" count="2"> - <expand macro="details_output_test" assembler="Trinity" /> - <expand macro="details_output_test" assembler="idba" /> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <conditional name="reads_option"> + <param name="selector" value='paired'/> + <param name="forward_reads" value="input_F.fastqsanger"/> + <param name="reverse_reads" value="input_F.fastqsanger"/> + </conditional> + <output name="short_report_tabular"> + <assert_contents> + <has_text text="Transcripts" /> + <has_size value="140" delta="5"/> + </assert_contents> + </output> + </test> + <!-- Test 06: Transcriptome reference, annotation, mapping and BUSCO, txt output--> + <test expect_num_outputs="1"> + <param name="transcripts" value="transcriptome01.fasta"/> + <conditional name="gene_coordinates"> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> + </conditional> + <conditional name="reads_option"> + <param name="selector" value='paired'/> + <param name="forward_reads" value="input_F.fastqsanger"/> + <param name="reverse_reads" value="input_R.fastqsanger"/> + </conditional> + <section name="output_options"> + <param name="out_sr" value="tabular"/> + </section> + <conditional name="use_busco"> + <param name="selector" value="true"/> + <conditional name="lineage_conditional"> + <param name="selector" value="cached"/> + <param name="cached_db" value="busco-demo-db-20230328"/> + </conditional> + </conditional> + <output name="short_report_tabular"> + <assert_contents> + <has_text text="Transcripts" /> + <has_size value="140" delta="5"/> + </assert_contents> + </output> + + </test> + <!-- Test 07: Transcriptome reference, annotation, mapping and BUSCO, additional outputs--> + <test expect_num_outputs="4"> + <param name="transcripts" value="transcriptome01.fasta"/> + <conditional name="gene_coordinates"> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> + </conditional> + <param name="reference" value="reference.fasta"/> + <conditional name="reads_option"> + <param name="selector" value='paired'/> + <param name="forward_reads" value="input_F.fastqsanger"/> + <param name="reverse_reads" value="input_R.fastqsanger"/> + </conditional> + <conditional name="use_busco"> + <param name="selector" value="true"/> + <conditional name="lineage_conditional"> + <param name="selector" value="cached"/> + <param name="cached_db" value="busco-demo-db-20230328"/> + </conditional> + </conditional> + <section name="output_options"> + <param name="out_sr" value="pdf,tabular"/> + <param name="out_add" value="fasta,gz"/> + </section> + <output_collection name="fasta_files" type="list" count="7"> + <element name="transcriptome01.paralogs" file="test_07_paralogs.fasta" ftype="fasta"/> </output_collection> + <output name="short_report_pdf" file="test_07_short_report.pdf" ftype="pdf" compare="sim_size" delta="1000"/> + <output name="short_report_tabular" file="test_07_short_report.tab" ftype="tabular"/> + <output name="compressed_files" ftype="tgz"> + <assert_contents> + <has_size value="281260" delta="250"/> + </assert_contents> + </output> </test> + <!-- Test 08: Multiple inputs--> <test expect_num_outputs="6"> - <param name="transcripts" value="Trinity.fasta" ftype="fasta" /> + <param name="transcripts" value="transcriptome01.fasta,transcriptome02.fasta"/> + <param name="reference" value="reference.fasta"/> <conditional name="gene_coordinates"> - <param name="use_gtf" value="false" /> + <param name="selector" value="true"/> + <param name="gtf" value="reference.gtf"/> + </conditional> + <section name="output_options"> + <param name="out_sr" value="tabular,pdf"/> + </section> + <conditional name="use_busco"> + <param name="selector" value="true"/> + <conditional name="lineage_conditional"> + <param name="selector" value="cached"/> + <param name="cached_db" value="busco-demo-db-20230328"/> + </conditional> + </conditional> + <param name="out_add" value="complete,fasta,logs,gz"/> + <conditional name="reads_option"> + <param name="selector" value="single"/> + <param name="single_reads" value="single_end.fastq.gz"/> </conditional> - <param name="min_alignment" value="30" /> - <param name="lower_threshold" value="45" /> - <param name="upper_threshold" value="95" /> - <param name="out_sr" value="txt,tex,tsv,pdf" /> - <param name="out_add" value="logs,details_plots" /> - - <expand macro="pdf_output_test" /> - <expand macro="tex_output_test" /> - <expand macro="tsv_output_test" /> - <expand macro="txt_output_test" /> - <output_collection name="list_logs" type="list"> - <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" /> - <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" /> + <output name="short_report_tabular" value="test_08_short_report.tab" ftype="tabular"/> + <output name="short_report_pdf" value="test_08_short_report.pdf" ftype="pdf"/> + <output name="stats" value="test_08_complete_report.tab" ftype="txt" lines_diff="6" /> + <output_collection name="fasta_files" type="list" count="14"> + <element name="transcriptome01.paralogs" file="test_08_paralogs.fasta" ftype="fasta"/> </output_collection> - <output_collection name="details_png" type="list:list" count="1"> - <element name="Trinity"> - <expand macro="element_has_text" name="Nx" text="PNG" /> - <expand macro="element_has_text" name="transcript_length" text="PNG" /> + <output_collection name="list_logs" type="list" count="14"> + <element name="STAR.out" ftype="txt"> + <assert_contents> + <has_text text="STAR --runThreadN"/> + <has_text text="finished successfully"/> + </assert_contents> + </element> + <element name="gmap_build.out" ftype="txt"> + <assert_contents> + <has_text text="No alternate scaffolds observed"/> + </assert_contents> + </element> + <element name="rnaQUAST" ftype="txt"> + <assert_contents> + <has_text text="THE QUALITY OF TRANSCRIPTOME ASSEMBLY DONE"/> + <has_text text="Thank you for using rnaQUAST!"/> + </assert_contents> </element> </output_collection> </test> - <test expect_num_outputs="6"> - <param name="transcripts" value="Trinity.fasta" ftype="fasta" /> - <conditional name="gene_coordinates"> - <param name="use_gtf" value="false" /> - </conditional> - <param name="min_alignment" value="30" /> - <param name="lower_threshold" value="45" /> - <param name="upper_threshold" value="95" /> - <param name="out_sr" value="txt,tex,tsv,pdf" /> - <param name="out_add" value="logs,details_plots" /> - <conditional name="busco_option"> - <param name="busco" value="true"/> - <param name="lineage" value="metazoa"/> - </conditional> - <expand macro="pdf_output_test" /> - <expand macro="tex_output_test" /> - <expand macro="tsv_output_test" /> - <expand macro="txt_output_test" /> - <output_collection name="list_logs" type="list"> - <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" /> - <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" /> - </output_collection> - <output_collection name="details_png" type="list:list" count="1"> - <element name="Trinity"> - <expand macro="element_has_text" name="Nx" text="PNG" /> - <expand macro="element_has_text" name="transcript_length" text="PNG" /> - </element> - </output_collection> - <assert_command> - <has_text text="--busco metazoa"/> - </assert_command> - </test> + </tests> <help><![CDATA[ -**What is rnaQUAST** -- a quality assessment tool for de novo transcriptome assemblies -- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database -- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts + +.. class:: infomark + +**Purpose** -**Using rnaQuast without reference** you wont get: +rnaQUAST is a tool for evaluating RNA-Seq assemblies using reference genome and gene database. In addition, rnaQUAST is also capable +of estimating gene database coverage by raw reads and de novo quality assessment. -- x-assembled (Exons) -- Alignments per Isoform -- x-covered (Exons) -- x-matched (Blocks) -- gmap build logs +.. class:: infomark -**Using rnaQuast with reference** you will get: -- Reports -- Logs -- Alignement/Basic Metrics -- Misassemblies/ Specificity/ Sensitivity -- Alignment multiplicity -- Block/ Transcript Lentgh -- Blocks per alignment -- Mismatch rate -- x-aligned -- Nx -- Blocks per alignment -- gmap build logs +**rnaQUAST pipeline** + +To evaluate quality of the assembled transcripts, rnaQUAST takes a reference genome in FASTA format and optionally its gene database in +GFF/GTF format. A user can provide either a FASTA file with transcripts, which will be aligned to the given reference genome using GMAP +or BLAT. The alignments are analyzed to calculate simple metrics and then are matched against the isoforms from the gene database in order +to obtain statistics that represent completeness and correctness levels of the assembly. In addition, rnaQUAST is capable of estimating +gene database coverage by raw reads using STAR or TopHat2. For de novo quality assessment when reference genome and gene database are +unavailable, the transcripts are analyzed using BUSCO. + +.. class:: infomark -**Using rnaQuast without gene coordinates** you wont get: -- x-assembled (Exons) -- Alignments per Isoform -- x-covered (Exons) -- x-matched (Blocks) -- gmap build logs -- Database Metrics -- Alignment multiplicity -- Mismatch rate -- NAx -- x-aligned -**Using rnaQuast with gene coordinates** you will get: -- Reports -- Logs -- Alignement/Basic Metrics -- Misassemblies/Specificity/Sensitivity -- Alignment multiplicity -- Block/Transcript length -- Blocks per alignment -- Mismatch rate -- x-aligned -- Nx/NAx -- gmap build logs -- Database Metrics -- Alignment multiplicity -More informations, see citations. +**Metrics and alignment analysis** + +rnaQUAST calculates various metrics without using alignment information, e.g. length distribution and N50 of the assembled transcripts. +Additionally, rnaQUAST computes the following statistics for the gene database: the total number of genes and isoforms, isoform and exon +length distribution, average number of exons per gene, etc. + +To analyze transcripts' alignments, rnaQUAST firstly filters out short partial alignments (shorter than a user-defined threshold, default +value is 50 bp). Such short alignments are typically caused by genomic repeats and thus are ignored. Afterwards, rnaQUAST selects the +best-scored spliced alignment for each transcript. If a transcript has more than one alignment with the highest score, it is reported +as multiply aligned. Otherwise, it is considered to be uniquely aligned. If the best-scored alignment is discordant (e.g. the transcript +has partial alignments that are either mapped to different strands or to different chromosomes) the transcript is classified as misassembled. +Transcripts without misassemblies are analyzed to calculate such metrics as average transcript alignment fraction and mismatch rate. + +For the simplicity of explanation, transcript is further referred to as a sequence generated by the assembler and isoform denotes a sequence +from the gene database. rnaQUAST matches best-scored alignments of non-misassembled transcripts to the isoforms' coordinates and analyzes +them to estimate how well the isoforms are covered by the assembly. rnaQUAST computes such metrics as database coverage (the total number +of covered bases of all isoforms divided by the total length of all isoforms) and the number of 50%/95%-assembled isoforms. An isoform is +considered to be x%-assembled if it has at least x% covered by a single transcript. Vice versa, to evaluate how well the assembled +transcripts are covered by the isoforms, rnaQUAST estimates the number of unannotated transcripts (that align to the genome, but do not +match to any isoform) and the number of 50%/95%-matched transcripts (that have corresponding fraction mapped to an isoform). Indeed, the +thresholds described above (50% and 95%) can be varied by the user. + + ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btw218 </citation>