view pyCRAC/pyFastqDuplicateRemover.xml @ 0:19b20927172d draft

author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
line wrap: on
line source

 <tool id ="pyFastqDuplicateRemover" name="pyFastqDuplicateRemover">
        <requirement type="package">pyCRAC</requirement>
	<command interpreter="perl">
	-f $ftype.f
	#if $ftype.reverse.rev == "yes":
		--out2 $out2
    #end if#
	-o $out
	--id $
	<version_command> --version</version_command>
		<conditional name="ftype">
		<param name="type" type="select"  label="File type">
			<option value="fastq" selected="true">FASTQ</option>
			<option value="fasta">FASTA</option>
		<when value="fastq">
			<param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                <when value="yes">
				    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
				<when value="no">
		<when value="fasta">
			<param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
			<conditional name="reverse">
                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
                    <option value="no" selected="true">NO</option>
                    <option value="yes">YES</option>
                <when value="yes">
				    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
				<when value="no">
		<param name="label" type="text" format="txt" size="30" value="pyFastqDuplicateRemover" label="Enter output file label -o" />
		<data format="fasta" name="out" label="${label.value}.fasta"/>
		<data format="fasta" name="out2" label="${label.value}_reverse.fasta">
			<filter>ftype['reverse']['rev'] == "yes"</filter>

.. class:: infomark


pyFastqDuplicateRemover is part of the pyCRAC_ package. Removes identical sequences from fastq and fasta files and returns a fasta file with collapsed data.

Can also process paired-end data.


Unprocessed fastq data with six random nucleotides at 5' end of the read::
After pyBarcodeFilter::

    This entry is printed to the NNNNNNGCCAAT barcode file.

After pyFastqDuplicateRemover::

    The '1' indicates that this is the first unique cDNA in the data
    GCGCCT is the random barcode sequence
    the '5' indicates that 5 reads were found with identical read and random barcode sequences
    the '/1' indicates that the seqeuence originates from the forward sequencing reaction
.. _pyCRAC:

**Parameter list**


  -f FILE, --input_file=FILE		
                                        name of the FASTQ or FASTA input file

  -r FILE, --reverse_input_file=FILE	
                                        name of the paired (or reverse) FASTQ or FASTA input file

  -o FILE, --output_file=FILE		
                                        Provide the path and name of the fastq or fasta output file. Default is standard output. 
					For paired-end data just provide a file name without file extension (!)