view pyCRAC/pyBarcodeFilter.xml @ 1:7c9574213c0a draft default tip

Uploaded
author swebb
date Thu, 20 Jun 2013 12:13:43 -0400
parents 19b20927172d
children
line wrap: on
line source

 <tool id ="pyBarcodeFilter" name="pyBarcodeFilter" force_history_refresh="True">
        <requirements>
            <requirement type="package">pyCRAC</requirement>
        </requirements>
	<command interpreter="perl"> 
	/usr/local/bin/pyBarcodeFilter.pl
	--file_type $ftype.type
	-f $ftype.f
	-b $barcode
	-m $mismatch
	$index
	--out $out
	--id $out.id 
	--output_path $__new_file_path__ 
	#if $ftype.reverse.rev == "yes":
        -r=$ftype.reverse.r
		$ftype.reverse.both
    #end if#
	</command>
	<version_command>pyBarcodeFilter.py --version</version_command>
	<inputs>
		<conditional name="ftype">
		<param name="type" type="select" label="File type">
			<option value="fastq" selected="true">FASTQ</option>
			<option value="fasta">FASTA</option>
		</param>
		<when value="fastq">
			<param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                </param>        
                <when value="yes">
                    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
                    <param name="both" type="select"  label="Search for barcode in both reads">
                        <option value="" selected="true">NO</option>
                        <option value="--both">YES</option>
                    </param>
				</when>
				<when value="no">
				</when>
			</conditional>
		</when>
		<when value="fasta">
			<param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                </param>        
                <when value="yes">
                    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
                    <param name="both" type="select"  label="Search for barcode in both reads">
                        <option value="" selected="true">NO</option>
                        <option value="--both">YES</option>
				    </param>
				</when>
				<when value="no">
				</when>
			</conditional>
		</when>
		</conditional>
		<param format="tabular" name="barcode" type="data" label="Barcode File -f" help="Tab delimited file with barcodes and barcode names" />
		<param format="integer" name="mismatch" type="integer" label="Mismatches -m" value="0" size="3" help="Set the number of allowed mismatches in a barcode">
			<validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
		</param>      
		<param name="index" type="select"  label="Split data using Illumina indexing barcode information -i">
            <option value="" selected="true">NO</option>
            <option value="-i">YES</option>
		</param> 
	</inputs>
	<outputs>
		<data format="text" name="out" label="pyBarcodeFilter"/>
	</outputs>
	<help>

.. class:: infomark

**pySolexaBarcodeFilter**

pySolexaBarcodeFilter is part of the pyCRAC_ package. Filters sequence files by barcodes.

This tool requires FASTA or FASTQ input files containing the raw data and a text file containing barcode information.
To process paired end data, use -f and the -r flags to indicate the path to the forward and reverse sequencing reactions, respectively. 
The barcodes file should two columns separated by a tab (see the table below). The first column should contain the barcode nucleotide sequences. 
The second column should contain an identifier, for example, the name of the barcode or the name of the experiment.
The ā€™Nā€™ in the barcode sequence indicates a random nucleotide. Make sure to use a simple text editor like TextEdit (MacOS X), gedit (Linux/Unix) or use a text editor in the terminal. 
The program is case sensitive: all the nucleotide sequences should be upper case. 
You can freely combine different barcodes but if you are mixing samples containing random nucleotide barcodes and normal barcodes.
**NOTE!** make sure to place the regular barcode sequence below the sequence with random nucleotides and make sure the shortest sequence is ALWAYS at the bottom in the column (see below)

Example of a barcode text file::

    NNNCGCTTAGC mutant2
    NNNGCGCAGC  mutant1
    NNNATTAG    control
    NNNTAAGC    myfavprotein
    AGC         oldcontrol
    AC          veryfirstbarcodedsample 
   
.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

Options::

  -f FILE, --input_file=FILE		
                            name of the FASTQ or FASTA input file
  -r FILE, --reverse_input_file=FILE	
                            name of the paired (or reverse) FASTQ or FASTA input file
  --file_type=FASTQ     		
                            type of file, uncompressed (fasta or fastq) or compressed (fasta.gz or fastq.gz, gzip/gunzip 
					        compressed). Default is fastq
  -b FILE, --barcode_list=FILE		
                            name of tab-delimited file containing barcodes and barcode names
  -m 1, --mismatches=1  		
                            to set the number of allowed mismatches in a barcode. A maximum of one mismatch is allowed. Default = 0
  -i, --index           		
                            use this option if you want to split the data using the Illumina indexing barcode information

	</help>
</tool>