Mercurial > repos > nilesh > rseqc

<tool id="rseqc_read_hexamer" name="Hexamer frequency" version="@WRAPPER_VERSION@">
    <description>
        calculates hexamer (6mer) frequency for reads, genomes, and mRNA sequences
    </description>

    <macros>
        <import>rseqc_macros.xml</import>
    </macros>

    <expand macro="requirements" />

    <expand macro="stdio" />

    <version_command><![CDATA[read_hexamer.py --version]]></version_command>

    <command><![CDATA[
        #import re
        #set $input_list = []
        #for $i, $input in enumerate($inputs):

            #set $safename = re.sub('[^\w\-_]', '_', $input.element_identifier)
            #if $safename in $input_list:
                #set $safename = str($safename) + "." + str($i)
            #end if
            $input_list.append($safename)

            #if $input.is_of_type("fastq.gz", "fastqsanger.gz"):
                gunzip -c '${input}' > "${safename}" &&
            #else:
                ln -sf '${input}' "${safename}" &&
            #end if
        #end for
        read_hexamer.py -i '${ ','.join( [ $name for $name in $input_list ] ) }'
        #if $refgenome:
            -r '${refgenome}'
        #end if
        #if $refgene:
            -g '${refgene}'
        #end if
        > '${output}'
        ]]>
    </command>

    <inputs>
        <param name="inputs" type="data" label="Read sequences in fasta or fastq format" format="fasta,fastq,fastqsanger,fastq.gz,fastqsanger.gz" help="(--input)" multiple="true" />
        <param name="refgenome" type="data" label="Reference genome seqeunce (fasta)" format="fasta" optional="true" help="(--refgenome)" />
        <param name="refgene" type="data" label="Reference mRNA sequence (fasta)" format="fasta" optional="true" help="(--refgene)" />
    </inputs>

    <outputs>
        <data name="output" format="tabular" label="${tool.name} on ${on_string}" />
    </outputs>

    <tests>
        <test>
            <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq"/>
            <output name="output">
                <assert_contents>
                    <has_line line="Hexamer&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq" />
                    <has_line line="AAAAAA&#009;0.00217391304348" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq.gz" ftype="fastqsanger.gz"/>
            <output name="output">
                <assert_contents>
                    <has_line line="Hexamer&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq_gz" />
                    <has_line line="AAAAAA&#009;0.00217391304348" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq,pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq"/>
            <output name="output">
                <assert_contents>
                    <has_line line="Hexamer&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R2_fastq" />
                    <has_line line="AAAAAA&#009;0.00217391304348&#009;0.00534759358289" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq,pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq"/>
            <output name="output">
                <assert_contents>
                    <has_line line="Hexamer&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq&#009;pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq.1" />
                    <has_line line="AAAAAA&#009;0.00217391304348&#009;0.00217391304348" />
                </assert_contents>
            </output>
        </test>
        <!-- Unable to test with collections at the moment (requires type="data_collection" on the input)
        <test>
            <param name="inputs">
                <collection type="list">
                    <element name="read_1" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq" />
                    <element name="read_2" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq" />
                </collection>
            </param>
            <output name="output" file="output.read_hexamer.2.txt" />
        </test>
        -->
    </tests>

    <help><![CDATA[
read_hexamer.py
+++++++++++++++++++++

Calculate hexamer (6mer) frequency. If ‘-r’ was specified, hexamer frequency
is also calculated for the reference genome. If ‘-g’ was provided, hexamer
frequency is also calculated for the mRNA sequences.

Inputs
++++++++++++++

Input reads file
    Read sequences in fasta or fastq format.

Reference Genome
    Reference genome sequence in fasta format.

Reference Gene
    Reference mRNA sequences in fasta format.


Outputs
++++++++++++++

Tabular file of hexamer frequences in for each input.

@ABOUT@

]]>
    </help>

    <expand macro="citations" />

</tool>
author	iuc
date	Thu, 28 Nov 2019 15:56:37 -0500
parents	09846d5169fa
children	dbedfc5f5a3c