0
|
1 <tool id ="pyFastqDuplicateRemover" name="pyFastqDuplicateRemover">
|
|
2 <requirements>
|
|
3 <requirement type="package">pyCRAC</requirement>
|
|
4 </requirements>
|
|
5 <command interpreter="perl">
|
|
6 pyFastqDuplicateRemover.pl
|
|
7 -f $ftype.f
|
|
8 #if $ftype.reverse.rev == "yes":
|
|
9 -r=$ftype.reverse.r
|
|
10 --out2 $out2
|
|
11 #end if#
|
|
12 -o $out
|
|
13 --id $out.id
|
|
14 </command>
|
|
15 <version_command>pyFastqDuplicateRemover.py --version</version_command>
|
|
16 <inputs>
|
|
17 <conditional name="ftype">
|
|
18 <param name="type" type="select" label="File type">
|
|
19 <option value="fastq" selected="true">FASTQ</option>
|
|
20 <option value="fasta">FASTA</option>
|
|
21 </param>
|
|
22 <when value="fastq">
|
|
23 <param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
|
|
24 <conditional name="reverse">
|
|
25 <param name="rev" type="select" label="Add a reverse or paired FastQ file">
|
|
26 <option value="no" selected="true">NO</option>
|
|
27 <option value="yes">YES</option>
|
|
28 </param>
|
|
29 <when value="yes">
|
|
30 <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
|
|
31 </when>
|
|
32 <when value="no">
|
|
33 </when>
|
|
34 </conditional>
|
|
35 </when>
|
|
36 <when value="fasta">
|
|
37 <param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
|
|
38 <conditional name="reverse">
|
|
39 <param name="rev" type="select" label="Add a reverse or paired FastA file">
|
|
40 <option value="no" selected="true">NO</option>
|
|
41 <option value="yes">YES</option>
|
|
42 </param>
|
|
43 <when value="yes">
|
|
44 <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
|
|
45 </when>
|
|
46 <when value="no">
|
|
47 </when>
|
|
48 </conditional>
|
|
49 </when>
|
|
50 </conditional>
|
|
51 <param name="label" type="text" format="txt" size="30" value="pyFastqDuplicateRemover" label="Enter output file label -o" />
|
|
52 </inputs>
|
|
53 <outputs>
|
|
54 <data format="fasta" name="out" label="${label.value}.fasta"/>
|
|
55 <data format="fasta" name="out2" label="${label.value}_reverse.fasta">
|
|
56 <filter>ftype['reverse']['rev'] == "yes"</filter>
|
|
57 </data>
|
|
58 </outputs>
|
|
59 <help>
|
|
60
|
|
61 .. class:: infomark
|
|
62
|
|
63 **pyFastqDuplicateRemover**
|
|
64
|
|
65 pyFastqDuplicateRemover is part of the pyCRAC_ package. Removes identical sequences from fastq and fasta files and returns a fasta file with collapsed data.
|
|
66
|
|
67 Can also process paired-end data.
|
|
68
|
|
69 **Examples**
|
|
70
|
|
71 Unprocessed fastq data with six random nucleotides at 5' end of the read::
|
|
72
|
|
73 @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1
|
|
74 GCGCCTGCCAATTCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
|
|
75 +
|
|
76 bb_ceeeegggggiiiiiifghiihiihiiiiiiiiiifggfhiecccc
|
|
77
|
|
78 After pyBarcodeFilter::
|
|
79
|
|
80 @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1##GCGCCT
|
|
81 TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
|
|
82 +
|
|
83 giiiiiifghiihiihiiiiiiiiiifggfhiecccc
|
|
84
|
|
85 This entry is printed to the NNNNNNGCCAAT barcode file.
|
|
86
|
|
87 After pyFastqDuplicateRemover::
|
|
88
|
|
89 >1_GCGCCT_5/1
|
|
90 TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
|
|
91
|
|
92 The '1' indicates that this is the first unique cDNA in the data
|
|
93 GCGCCT is the random barcode sequence
|
|
94 the '5' indicates that 5 reads were found with identical read and random barcode sequences
|
|
95 the '/1' indicates that the seqeuence originates from the forward sequencing reaction
|
|
96
|
|
97 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
|
|
98
|
|
99 ------
|
|
100
|
|
101 **Parameter list**
|
|
102
|
|
103 Options::
|
|
104
|
|
105 -f FILE, --input_file=FILE
|
|
106 name of the FASTQ or FASTA input file
|
|
107
|
|
108 -r FILE, --reverse_input_file=FILE
|
|
109 name of the paired (or reverse) FASTQ or FASTA input file
|
|
110
|
|
111 -o FILE, --output_file=FILE
|
|
112 Provide the path and name of the fastq or fasta output file. Default is standard output.
|
|
113 For paired-end data just provide a file name without file extension (!)
|
|
114 </help>
|
|
115 </tool>
|
|
116
|
|
117
|