view fastq_paired_end_joiner.xml @ 5:822cc1e6274e draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_paired_end_joiner commit 28c441e8aa66a55d276b0f6325d34086eb715872
author devteam
date Sat, 07 Oct 2017 09:58:53 -0400
parents 080a058abf1e
children 09a2199cd356
line wrap: on
line source

<tool id="fastq_paired_end_joiner" name="FASTQ joiner" version="2.0.1.1">
    <description>on paired end reads</description>
    <requirements>
        <requirement type="package" version="1.1.2">galaxy_sequence_utils</requirement>
    </requirements>
    <command><![CDATA[
gx-fastq-paired-end-joiner '$input1_file' '${input1_file.extension[len('fastq'):]}' '$input2_file' '${input2_file.extension[len('fastq'):]}' '$output_file' $style '${paste_sequence}'
    ]]></command>
    <inputs>
        <param name="input1_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Left-hand Reads" />
        <param name="input2_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="Right-hand Reads" />
        <param name="style" type="select" label="FASTQ Header Style">
            <option value="old" selected="true">old</option>
            <option value="new">new</option>
        </param>
        <param name="paste_sequence" type="text" value="" label="Bases to insert between joined reads" help="Values are in Base-space and quality scores of maximal value will be used"/>
    </inputs>
    <outputs>
        <data name="output_file" format_source="input1_file" />
    </outputs>
    <tests>
        <test>
            <param name="input1_file" value="split_pair_reads_1.fastqsanger" ftype="fastqsanger" />
            <param name="input2_file" value="split_pair_reads_2.fastqsanger" ftype="fastqsanger" />
            <output name="output_file" file="3.fastqsanger" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input1_file" value="split_pair_reads_1.fastqsanger.gz" ftype="fastqsanger.gz" />
            <param name="input2_file" value="split_pair_reads_2.fastqsanger.gz" ftype="fastqsanger.gz" />
            <output name="output_file" file="3.fastqsanger" ftype="fastqsanger.gz" decompress="true" />
        </test>
        <test>
            <param name="input1_file" value="split_pair_reads_1.fastqsanger.bz2" ftype="fastqsanger.bz2" />
            <param name="input2_file" value="split_pair_reads_2.fastqsanger.bz2" ftype="fastqsanger.bz2" />
            <output name="output_file" file="3.fastqsanger" ftype="fastqsanger.bz2" decompress="true" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool joins paired end FASTQ reads from two separate files into a
single read in one file.  The join is performed using sequence
identifiers, allowing the two files to contain differing ordering.  If
a sequence identifier does not appear in both files, it is excluded
from the output.

-----

**Input formats**

Both old and new (from recent Illumina software) style FASTQ headers
are supported.  The following example uses the "old" style.

Left-hand Read::

    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh

Right-hand Read::

    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR

-----

**Output**

A multiple-fastq file, for example::

    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR

------

**The "new" style**

Recent Illumina FASTQ headers are structured as follows::

  @COORDS FLAGS
  COORDS = INSTRUMENT:RUN_#:FLOWCELL_ID:LANE:TILE:X:Y
  FLAGS = READ:IS_FILTERED:CONTROL_NUMBER:INDEX_SEQUENCE

where the whitespace character between COORDS and FLAGS can be either
a space or a tab.

------

**Credits**

New style header support added by Simone Leo <simone.leo@crs4.it> .
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btq281</citation>
    </citations>
</tool>