view umi-tools_extract.xml @ 2:d1015c2516b7 draft

planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author iuc
date Wed, 10 Jan 2018 19:10:01 -0500
parents 79436b3019e9
children e73a22ff585c
line wrap: on
line source

<tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.0">
    <description>Extract UMI from fastq files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        #set $gz = False
        #if $input_type.type == 'single':
            #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
                ln -s '$input_type.input_single' input_single.gz &&
                #set $gz = True
            #end if
        #else
            #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
                ln -s '$input_type.input_read1' input_read1.gz &&
                ln -s '$input_type.input_read2' input_read2.gz &&
                #set $gz = True
            #end if
        #end if
        umi_tools extract
            --bc-pattern='$bc_pattern'
            #if $input_type.type == 'single':
                #if $gz:
                    --stdin=input_single.gz
                    --stdout out.gz
                #else
                    --stdin='$input_type.input_single'
                    --stdout '$out'
                #end if
            #else:
                #if $gz:
                    --stdin=input_read1.gz
                    --read2-in=input_read2.gz
                    --stdout out1.gz
                    --read2-out=out2.gz
                #else:
                    --stdin='$input_type.input_read1'
                    --read2-in='$input_type.input_read2'
                    --stdout '$out1'
                    --read2-out='$out2'
                #end if
                #if $input_type.barcode.split == "1":
                    --split-barcode
                    --bc-pattern2='$input_type.barcode.bc_pattern2'
                #end if
            #end if
            #if not $prime3:
                --3prime
            #end if
            #if $quality.quality_selector =='true':
                --quality-filter-threshold '$quality.quality_filter_threshold'
                --quality-encoding '$quality.quality_encoding'
            #end if
            #if $print_log == "1":
                --log='$out_log'
            #else
                --supress-stats
            #end if
        #if $gz:
            #if $input_type.type == 'single':
                && mv out.gz '$out'
            #else
                && mv out1.gz '$out1'
                && mv out2.gz '$out2'
            #end if
        #end if
    ]]></command>
    <inputs>
        <conditional name="input_type">
            <param name="type" type="select" label="Library type">
                <option value="single">Single-end</option>
                <option value="paired">Paired-end</option>
            </param>
            <when value="single">
                <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
            </when>
            <when value="paired">
                <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
                <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
                <conditional name="barcode">
                    <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?">
                        <option value="0">Barcode on first read only</option>
                        <option value="1">Barcode on both reads</option>
                    </param>
                    <when value="0">
                    </when>
                    <when value="1">
                        <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
                            help="Use this option to specify the format of the UMI/barcode for
                                  the second read pair if required.">
                        </param>
                    </when>
                </conditional>
            </when>
        </conditional>
        <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
            help="Use this option to specify the format of the UMI/barcode. Use Ns to
                    represent the random positions and Xs to indicate the bc positions.
                    Bases with Ns will be extracted and added to the read name. Remaining
                    bases, marked with an X will be reattached to the read.">
        </param>
        <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
            truevalue="1" falsevalue="0" checked="true"
            help="By default the barcode is assumed to be on the 5' end of the read, but
                use this option to sepecify that it is on the 3' end instead." />
        <param name="print_log" argument="-L" type="boolean" label="Output log?"
            truevalue="1" falsevalue="0" checked="true"
            help="Choose if you want to generate a text file containing logging information." />
        <conditional name="quality">
            <param name="quality_selector" type="select" label="Enable quality filter?" >
                <option value="false">No</option>
                <option value="true">Yes</option>
            </param>
            <when value="false">
            </when>
            <when value="true">
                <param name="quality_filter_threshold" label="Phred score threshold"
                    type="integer" value="20" argument="--quality-filter-threshold"
                    help="Remove reads where any UMI base quality score falls below this threshold." />
                <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
                    help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
                    <option value="phred33">phred33 [33-77]</option>
                    <option value="phred64">phred64 [64-106]</option>
                    <option value="solexa">solexa [59-106]</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="out" format_source="input_single">
            <filter>input_type['type'] == "single"</filter>
        </data>
        <data name="out1" format_source="input_read1">
            <filter>input_type['type'] == "paired"</filter>
        </data>
        <data name="out2" format_source="input_read2">
            <filter>input_type['type'] == "paired"</filter>
        </data>
        <data name="out_log" format="txt">
            <filter>print_log == True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="type" value="single" />
            <param name="input_single" value="t_R1.fastq" ftype="fastq" />
            <param name="bc_pattern" value="XXXNNN" />
            <param name="prime3" value="0" />
            <param name="quality_selector" value="true" />
            <param name="quality_filter_threshold" value="10" />
            <param name="quality_encoding" value="phred33" />
            <output name="out" file="out_SE.fastq" />
            <output name="out_log" file="out_single.log" lines_diff="22"/>
        </test>
        <test>
            <param name="type" value="paired" />
            <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
            <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
            <param name="bc_pattern" value="NNNXXX" />
            <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
            <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
            <output name="out_log" file="out_paired.log" lines_diff="16"/>
        </test>
    </tests>
    <help><![CDATA[


UMI-tools extract.py - Extract UMI from fastq
=============================================

Purpose
-------

Extract UMI barcode from a read and add it to the read name, leaving
any sample barcode in place. Can deal with paired end reads and UMIs
split across the paired ends

Options
-------

--split-barcode
       By default the UMI is assumed to be on the first read. Use this
       option if the UMI is contained on both reads and specify the
       pattern of the barcode/UMI on the second read using the option
       ``--bc-pattern2``

--bc-pattern
       Use this option to specify the format of the UMI/barcode. Use Ns to
       represent the random positions and Xs to indicate the bc positions.
       Bases with Ns will be extracted and added to the read name. Remaining
       bases, marked with an X will be reattached to the read.

       E.g. If the pattern is NNXXNN,
       Then the read:

       @HISEQ:87:00000000 read1
       AAGGTTGCTGATTGGATGGGCTAG
       DA1AEBFGGCG01DFH00B1FF0B
       +

       will become:
       @HISEQ:87:00000000_AATT read1
       GGGCTGATTGGATGGGCTAG
       1AFGGCG01DFH00B1FF0B
       +

--bc-pattern2
       Use this option to specify the format of the UMI/barcode for
       the second read pair if required. If --bc-pattern2 is not
       supplied, this defaults to the same pattern as --bc-pattern

--3prime
       By default the barcode is assumed to be on the 5' end of the read, but
       use this option to sepecify that it is on the 3' end instead

-L
       Specify a log file to retain logging information and final statistics

--split-barcode
       barcode is split across read pair

--quality-filter-threshold=QUALITY_FILTER_THRESHOLD
       Remove reads where any UMI base quality score falls
       below this threshold
--quality-encoding=QUALITY_ENCODING
       Quality score encoding. Choose from phred33[33-77]
       phred64 [64-106] or solexa [59-106]

Usage:
------

For single ended reads:
        umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]

reads from stdin and outputs to stdout.

For paired end reads:
        umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]

reads end one from stdin and end two from FASTQIN and outputs end one to stdin
and end two to FASTQOUT.

    ]]></help>
    <expand macro="citations" />
</tool>