Mercurial > repos > lparsons > fastx_barcode_splitter_enhanced

<tool id="cshl_princeton_fastx_barcode_splitter" version="1.2" name="Barcode Splitter">
  <description></description>
  <command interpreter="bash" detect_errors="aggressive"><![CDATA[
fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "split/" $input.extension --mismatches $mismatches --partial $partial
#if $refBarcodeLocation.barcodeLocation == "idxfile":
    --idxfile $refBarcodeLocation.idxfile
#else:
    $refBarcodeLocation.EOL
#end if
> $summary
]]>
  </command>

  <inputs>
    <param format="txt" name="BARCODE" type="data" label="Barcodes to use" />
    <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" />

    <conditional name="refBarcodeLocation">
      <param name="barcodeLocation" type="select" label="Barcodes found at">
        <option value="bol">Start of sequence (5' end)</option>
        <option value="eol">End of sequence (3' end)</option>
        <option value="idxfile">Separate index file</option>
      </param>
      <when value="bol">
        <param name="EOL" type="hidden" value="--bol" />
      </when>
      <when value="eol">
        <param name="EOL" type="hidden" value="--eol" />
      </when>
      <when value="idxfile">
        <param name="idxfile" type="data" format="fasta,fastq,fastqsanger" label="Select index read file" />
      </when>
    </conditional>

    <param name="mismatches" type="integer" size="3" value="0" label="Number of allowed mismatches" />

    <param name="partial" type="integer" size="3" value="0" label="Number of allowed barcodes nucleotide deletions" />

  </inputs>

  <outputs>
    <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" />
    <collection name="split_output" type="list" format_source="input" label="${tool.name} on ${on_string}">
      <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" label="${designation}"/>
    </collection>
  </outputs>

  <tests>
    <test>
      <!-- Split a FASTQ file -->
      <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
      <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
      <param name="barcodeLocation" value="bol" />
      <param name="mismatches" value="2" />
      <param name="partial" value="0" />
      <output name="summary" file="fastx_barcode_splitter1.out" />
      <collection name="output" type="list">
        <discovered_dataset designation="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
        <discovered_dataset designation="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
        <discovered_dataset designation="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
        <discovered_dataset designation="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
        <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
      </collection>
    </test>

    <test>
      <!-- Split a FASTQ file, using separate index read -->
      <param name="BARCODE" value="fastx_barcode_splitter1.txt" />
      <param name="input" value="fastx_barcode_splitter1.fastq" ftype="fastqsolexa" />
      <param name="idxfile" value="fastx_barcode_splitter_index.fastq" ftype="fastqsolexa" />
      <param name="barcodeLocation" value="idxfile" />
      <param name="mismatches" value="2" />
      <param name="partial" value="0" />
      <output name="output" file="fastx_barcode_splitter1.out" />
      <collection name="split_output" type="list">
        <discovered_dataset designation="BC1" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC1.out" />
        <discovered_dataset designation="BC2" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC2.out" />
        <discovered_dataset designation="BC3" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC3.out" />
        <discovered_dataset designation="BC4" ftype="fastqsolexa" file="fastx_barcode_splitter1_BC4.out" />
        <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="fastx_barcode_splitter1_unmatched.out" />
      </collection>
    </test>
  </tests>

    <help><![CDATA[
**What it does**

This tool splits a FASTQ or FASTA file into several files, using barcodes as the split criteria.

--------

**Barcode file Format**

Barcode files are simple text files.
Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character.
Example::

    #This line is a comment (starts with a 'number' sign)
    BC1  GATCT
    BC2  ATCGT
    BC3  GTGAT
    BC4 TGTCT

For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name).
Sequences matching the barcode will be stored in the appropriate file.

One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored.

The output of this tool is a summary tabel displaying the split counts for each barcode identifier.
In addition, each fastq file produced will be loaded into the galaxy history as part of a collection list.
]]>
  </help>

  <!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
  <citations>
        <citation type="bibtex">
      @misc{gordon_fastx-toolkit_????,
        title = {{FASTX}-{Toolkit}},
        url = {http://hannonlab.cshl.edu/fastx_toolkit/index.html},
        author = {Gordon, Assaf}
      }
    </citation>
  </citations>

</tool>
author	lparsons
date	Mon, 02 May 2016 17:04:32 -0400
parents	e7b7cdc1834d
children