view rna_quast.xml @ 4:f9f2ad782d8f draft

"planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit 790683fb2de6fd7e1275967b2aca93514cada7e9"
author iuc
date Thu, 20 Jan 2022 21:09:47 +0000
parents a9edbe21bf47
children f89e3c318453
line wrap: on
line source

<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>A quality assessment tool for De Novo transcriptome assemblies</description>
    <xrefs>
        <xref type="bio.tools">rnaQUAST</xref>
    </xrefs>
    <macros>
        <token name="@TOOL_VERSION@">2.2.1</token>
        <token name="@VERSION_SUFFIX@">1</token>
        <xml name="element_matching_line" token_name="" token_expression="">
            <element name="@NAME@">
                <assert_contents>
                    <has_line_matching expression="@EXPRESSION@" />
                </assert_contents>
            </element>
        </xml>
        <xml name="element_has_text" token_name="" token_text="">
            <element name="@NAME@">
                <assert_contents>
                    <has_text text="@TEXT@" />
                </assert_contents>
            </element>
        </xml>
        <xml name="details_output_test" token_assembler="">
            <element name="@ASSEMBLER@">
                <element name="5000%-assembled.list">
                    <assert_contents>
                        <has_n_lines n="0" />
                    </assert_contents>
                </element>
                <element name="9500%-assembled.list">
                    <assert_contents>
                        <has_n_lines n="0" />
                    </assert_contents>
                </element>
                <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" />
                <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" />
                <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" />
                <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" />
                <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" />
                <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" />
                <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" />
                <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" />
                <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" />
                <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" />
                <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" />
            </element>
        </xml>

        <xml name="txt_output_test" token_assembler="">
            <output name="short_report_txt">
                <assert_contents>
                    <has_text text="SHORT SUMMARY REPORT" />
                </assert_contents>
            </output>
        </xml>
        <xml name="tex_output_test" token_assembler="">
            <output name="short_report_tex">
                <assert_contents>
                    <has_text text="Short summary report" />
                    <has_text text="end{document}" />
                </assert_contents>
            </output>
        </xml>
        <xml name="tsv_output_test" token_assembler="">
            <output name="short_report_tsv">
                <assert_contents>
                    <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" />
                </assert_contents>
            </output>
        </xml>
        <xml name="pdf_output_test" token_assembler="">
            <output name="short_report_pdf">
                <assert_contents>
                    <has_text text="rnaQUAST short report" />
                </assert_contents>
            </output>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
    </requirements>
    <stdio>
        <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
    </stdio>
    <command detect_errors="exit_code"><![CDATA[
    #import re
    #for $i in $transcripts
        ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
    #end for
    #if $r
        #for $rf in $r
            ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
        #end for
    #end if
    #if $gene_coordinates.use_gtf == "true"
        #for $g in $gene_coordinates.gtf
            ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
        #end for
    #end if
    mkdir outputdir &&
    rnaQUAST.py
    --threads \${GALAXY_SLOTS:-1}
    --transcripts
    #for $i in $transcripts
         '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
    #end for
    $strand_specific
    #if $r
        -r
        #for $rf in $r
            '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
        #end for
    #end if
    #if $gene_coordinates.use_gtf == "true"
        --gtf
        #for $g in $gene_coordinates.gtf
            '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
        #end for
        $gene_coordinates.disable_infer_genes
        $gene_coordinates.disable_infer_transcripts
    #end if
    $prokaryote
    --min_alignment '$min_alignment'
    #if "pdf" not in $out_sr and "plots" not in $out_add
        --no_plots
    #end if
    $blat
    #if $busco_option.busco == 'true'
        --busco $busco_option.lineage
    #end if
    ##$gene_mark
    $meta
    --lower_threshold $lower_threshold
    --upper_threshold $upper_threshold
    -o outputdir

    && mkdir details

    ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
    ## to a joint dir (details) to make them discoverable
    ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
    #for $i in $transcripts
        #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
        &&
        (for f in \$(find 'outputdir/'$basename'_output' -type f);
        do
            d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
            mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
        done)
    #end for

    ## rename .list files to .txt files to make them detectable (format detection by extension)
    ## the final `true` seems needed since otherwise the `;` at the end is swallowed
    && find details/ -name "*.list" -exec mv {} {}.txt \;
    && true
    ]]>    </command>
    <inputs>
        <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/>
        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" 
            help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
        <conditional name="gene_coordinates">
            <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
                <option value="true" selected="true">Yes</option>
                <option value="false">No</option>
            </param>
            <when value="true">
                <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" 
                    help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/>
                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
            </when>
            <when value="false">
            </when>
        </conditional>
        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/>
        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
        <conditional name="busco_option">
          <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts">
              <option value="false">Disabled</option>
              <option value="true">Enabled</option>
          </param>
          <when value="false"/>
          <when value="true">
            <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
                <option value="metazoa">Metazoa</option>
                <option value="eukaryota">Eukaryota</option>
                <option value="arthropoda">Arthropoda</option>
                <option value="vertebrata">Vertebrata</option>
                <option value="fungi">Fungi</option>
                <option value="bacteria">Bacteria</option>
            </param>
          </when>
        </conditional>
        <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
        <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
        <param name="out_sr" type="select" multiple="true" label="Short report formats">
            <option value="tsv" selected="true">tabular</option>
            <option value="txt">txt</option>
            <option value="tex">tex</option>
            <option value="pdf" selected="true">pdf</option>
        </param>
        <param name="out_add" type="select" multiple="true" label="Additional outputs">
            <option value="logs">Logs</option>
            <option value="plots" selected="true">Plots (only for n>1)</option>
            <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
            <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
            <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
        </param>
    </inputs>

    <outputs>
        <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
            <filter>"pdf" in out_sr</filter>
        </data>
        <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
            <filter>"txt" in out_sr</filter>
        </data>
        <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
            <filter>"tex" in out_sr</filter>
        </data>
        <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
            <filter>"tsv" in out_sr</filter>
        </data>
        <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs">
            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" />
            <filter>"logs" in out_add</filter>
        </collection>
        <!-- note the output filter of the next two outputs checks if there is
             more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) -->
        <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots">
            <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" />
            <filter> isinstance(transcripts, list) and "plots" in out_add</filter>
        </collection>
        <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison">
            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
            <filter> isinstance(transcripts, list) and "comparison" in out_add</filter>
        </collection>
        <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false" />
            <filter>"details" in out_add</filter>
        </collection>
        <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false" />
            <filter>"details_plots" in out_add</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="7">
            <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" />
            <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
            <conditional name="gene_coordinates">
                <param name="use_gtf" value="true" />
                <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
                <param name="disable_infer_genes" value="true" />
                <param name="disable_infer_transcripts" value="true" />
            </conditional>
            <param name="out_sr" value="txt,tex,tsv" />
            <param name="out_add" value="logs,comparison,plots,details" />
            <expand macro="txt_output_test" />
            <expand macro="tex_output_test" />
            <expand macro="tsv_output_test" />
            <output_collection name="comparison_png" type="list" count="15" />
            <output_collection name="comparison" type="list" count="19" />
            <output_collection name="list_logs" type="list" count="8" />
            <output_collection name="details" type="list:list" count="2">
                <expand macro="details_output_test" assembler="Trinity" />
                <expand macro="details_output_test" assembler="idba" />
            </output_collection>
        </test>
        <test expect_num_outputs="6">
            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
            <conditional name="gene_coordinates">
                <param name="use_gtf" value="false" />
            </conditional>
            <param name="min_alignment" value="30" />
            <param name="lower_threshold" value="45" />
            <param name="upper_threshold" value="95" />
            <param name="out_sr" value="txt,tex,tsv,pdf" />
            <param name="out_add" value="logs,details_plots" />

            <expand macro="pdf_output_test" />
            <expand macro="tex_output_test" />
            <expand macro="tsv_output_test" />
            <expand macro="txt_output_test" />
            <output_collection name="list_logs" type="list">
                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
            </output_collection>
            <output_collection name="details_png" type="list:list" count="1">
                <element name="Trinity">
                    <expand macro="element_has_text" name="Nx" text="PNG" />
                    <expand macro="element_has_text" name="transcript_length" text="PNG" />
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="6">
            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
            <conditional name="gene_coordinates">
                <param name="use_gtf" value="false" />
            </conditional>
            <param name="min_alignment" value="30" />
            <param name="lower_threshold" value="45" />
            <param name="upper_threshold" value="95" />
            <param name="out_sr" value="txt,tex,tsv,pdf" />
            <param name="out_add" value="logs,details_plots" />
            <conditional name="busco_option">
                <param name="busco" value="true"/>
                <param name="lineage" value="metazoa"/>
            </conditional>
            <expand macro="pdf_output_test" />
            <expand macro="tex_output_test" />
            <expand macro="tsv_output_test" />
            <expand macro="txt_output_test" />
            <output_collection name="list_logs" type="list">
                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
            </output_collection>
            <output_collection name="details_png" type="list:list" count="1">
                <element name="Trinity">
                    <expand macro="element_has_text" name="Nx" text="PNG" />
                    <expand macro="element_has_text" name="transcript_length" text="PNG" />
                </element>
            </output_collection>
            <assert_command>
                <has_text text="--busco metazoa"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
**What is rnaQUAST**
- a quality assessment tool for de novo transcriptome assemblies
- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts

**Using rnaQuast without reference** you wont get:

- x-assembled (Exons)
- Alignments per Isoform
- x-covered (Exons)
- x-matched (Blocks)
- gmap build logs

**Using rnaQuast with reference** you will get:
- Reports
- Logs
- Alignement/Basic Metrics
- Misassemblies/ Specificity/ Sensitivity
- Alignment multiplicity
- Block/ Transcript Lentgh
- Blocks per alignment
- Mismatch rate
- x-aligned
- Nx
- Blocks per alignment
- gmap build logs

**Using rnaQuast without gene coordinates** you wont get:
- x-assembled (Exons)
- Alignments per Isoform
- x-covered (Exons)
- x-matched (Blocks)
- gmap build logs
- Database Metrics
- Alignment multiplicity
- Mismatch rate
- NAx
- x-aligned
**Using rnaQuast with gene coordinates** you will get:
- Reports
- Logs
- Alignement/Basic Metrics
- Misassemblies/Specificity/Sensitivity
- Alignment multiplicity
- Block/Transcript length
- Blocks per alignment
- Mismatch rate
- x-aligned
- Nx/NAx
- gmap build logs
- Database Metrics
- Alignment multiplicity
More informations, see citations.
    ]]>    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btw218 </citation>
    </citations>
</tool>