view pyCRAC/pyFastqDuplicateRemover.xml @ 1:7c9574213c0a draft default tip

Uploaded
author swebb
date Thu, 20 Jun 2013 12:13:43 -0400
parents 19b20927172d
children
line wrap: on
line source

 <tool id ="pyFastqDuplicateRemover" name="pyFastqDuplicateRemover">
    <requirements>
        <requirement type="package">pyCRAC</requirement>
    </requirements>
	<command interpreter="perl"> 
	pyFastqDuplicateRemover.pl
	-f $ftype.f
	#if $ftype.reverse.rev == "yes":
        -r=$ftype.reverse.r
		--out2 $out2
    #end if#
	-o $out
	--id $out.id
	</command>
	<version_command>pyFastqDuplicateRemover.py --version</version_command>
	<inputs>
		<conditional name="ftype">
		<param name="type" type="select"  label="File type">
			<option value="fastq" selected="true">FASTQ</option>
			<option value="fasta">FASTA</option>
		</param>
		<when value="fastq">
			<param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                </param>        
                <when value="yes">
				    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
				</when>
				<when value="no">
				</when>
			</conditional>
		</when>
		<when value="fasta">
			<param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                </param>        
                <when value="yes">
				    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
				</when>
				<when value="no">
				</when>
			</conditional>
		</when>
		</conditional>
		<param name="label" type="text" format="txt" size="30" value="pyFastqDuplicateRemover" label="Enter output file label -o" />
	</inputs>
	<outputs>
		<data format="fasta" name="out" label="${label.value}.fasta"/>
		<data format="fasta" name="out2" label="${label.value}_reverse.fasta">
			<filter>ftype['reverse']['rev'] == "yes"</filter>
		</data>
	</outputs>
	<help>

.. class:: infomark

**pyFastqDuplicateRemover**

pyFastqDuplicateRemover is part of the pyCRAC_ package. Removes identical sequences from fastq and fasta files and returns a fasta file with collapsed data.

Can also process paired-end data.

**Examples**

Unprocessed fastq data with six random nucleotides at 5' end of the read::
    
    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1
    GCGCCTGCCAATTCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
    +
    bb_ceeeegggggiiiiiifghiihiihiiiiiiiiiifggfhiecccc
    
After pyBarcodeFilter::

    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1##GCGCCT
    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
    +
    giiiiiifghiihiihiiiiiiiiiifggfhiecccc
    
    This entry is printed to the NNNNNNGCCAAT barcode file.

After pyFastqDuplicateRemover::

    >1_GCGCCT_5/1
    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
    
    The '1' indicates that this is the first unique cDNA in the data
    GCGCCT is the random barcode sequence
    the '5' indicates that 5 reads were found with identical read and random barcode sequences
    the '/1' indicates that the seqeuence originates from the forward sequencing reaction
   
.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

Options::

  -f FILE, --input_file=FILE		
                                        name of the FASTQ or FASTA input file

  -r FILE, --reverse_input_file=FILE	
                                        name of the paired (or reverse) FASTQ or FASTA input file

  -o FILE, --output_file=FILE		
                                        Provide the path and name of the fastq or fasta output file. Default is standard output. 
					For paired-end data just provide a file name without file extension (!)
	</help>
</tool>