Mercurial > repos > mvdbeek > dedup_hash
annotate dedup_hash.xml @ 0:f33e9e6a6c88 draft default tip
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author | mvdbeek |
---|---|
date | Wed, 23 Nov 2016 07:49:05 -0500 |
parents | |
children |
rev | line source |
---|---|
0
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
1 <tool id="dedup_hash" name="Deduplicate FASTQ files" version="0.1.1"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
2 <description>with fast and memory-efficient sequence hashes</description> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
3 <requirements> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
4 <requirement type="package" version="0.150.1">smhasher</requirement> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
5 </requirements> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
6 <command><![CDATA[ |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
7 python '$__tool_directory__/dedup_hash/dedup_hash.py' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
8 #if str($readtype.single_or_paired) == "se": |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
9 --r1_in '${readtype.input_single}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
10 --r1_out '$output_single' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
11 #elif str($readtype.single_or_paired) == "pe_sep": |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
12 --r1_in '${readtype.input_paired1}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
13 --r2_in '${readtype.input_paired2}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
14 --r1_out '$output_paired1' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
15 --r2_out '$output_paired2' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
16 #else |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
17 --r1_in '${readtype.input_paired.forward}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
18 --r2_in '${readtype.input_paired.reverse}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
19 --r1_out '${output_paired_coll.forward}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
20 --r2_out '${output_paired_coll.reverse}' |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
21 #end if |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
22 $compress_fastq |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
23 ]]></command> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
24 <inputs> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
25 <conditional name="readtype"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
26 <param name="single_or_paired" type="select" label="Single-end or paired-end reads?"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
27 <option value="se" selected="true">Single-end</option> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
28 <option value="pe_sep">Paired-end (two separate input files)</option> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
29 <option value="pe_collection">Paired-end (as collection)</option> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
30 </param> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
31 <when value="se"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
32 <param format="fastq,fastq.gz" name="input_single" type="data" label="Single-end FASTQ reads" help="(-f)" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
33 </when> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
34 <when value="pe_sep"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
35 <param format="fastq,fastq.gz" name="input_paired1" type="data" label="Paired-end forward strand FASTQ reads" help="(-f)" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
36 <param format="fastq,fastq.gz" name="input_paired2" type="data" label="Paired-end reverse strand FASTQ reads" help="(-r)" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
37 </when> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
38 <when value="pe_collection"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
39 <param name="input_paired" format="fastq,fastq.gz" type="data_collection" collection_type="paired" label="Paired-end FASTQ reads as paired collection" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
40 </when> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
41 </conditional> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
42 <param name="compress_fastq" type="boolean" checked="true" truevalue="--write_gzip" falsevalue="" label="Produce compressed fastq?"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
43 </inputs> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
44 <outputs> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
45 <data name="output_single" format="fastq" label="Single-end output of ${tool.name} on ${on_string}"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
46 <filter>readtype['single_or_paired'] == 'se'</filter> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
47 <change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
48 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
49 </change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
50 </data> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
51 <data name="output_paired1" format="fastq" label="Paired-end forward strand output of ${tool.name} on ${on_string}"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
52 <filter>readtype['single_or_paired'] == 'pe_sep'</filter> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
53 <change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
54 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
55 </change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
56 </data> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
57 <data name="output_paired2" format="fastq" label="Paired-end reverse strand output of ${tool.name} on ${on_string}"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
58 <filter>readtype['single_or_paired'] == 'pe_sep'</filter> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
59 <change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
60 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
61 </change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
62 </data> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
63 <collection name="output_paired_coll" type="paired" structured_like="readtype.pe_collection" label="Paired-end output of ${tool.name} on ${on_string}"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
64 <filter>readtype['single_or_paired'] == 'pe_collection'</filter> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
65 <data name="forward" format="fastq"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
66 <change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
67 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
68 </change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
69 </data> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
70 <data name="reverse" format="fastq"> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
71 <change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
72 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" /> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
73 </change_format> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
74 </data> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
75 </collection> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
76 </outputs> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
77 <tests> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
78 <test> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
79 <param name="single_or_paired" value="pe_sep"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
80 <param name="input_paired1" value="r1.fastq.gz" ftype="fastq.gz"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
81 <param name="input_paired2" value="r2.fastq.gz" ftype="fastq.gz"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
82 <param name="compress_fastq" value="--write_gzip"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
83 <output name="output_paired1" file="r1_dedup.fastq.gz" ftype="fastq.gz" compare="sim_size"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
84 <output name="output_paired2" file="r2_dedup.fastq.gz" ftype="fastq.gz" compare="sim_size"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
85 </test> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
86 <test> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
87 <param name="single_or_paired" value="pe_sep"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
88 <param name="input_paired1" value="r1.fastq" ftype="fastq"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
89 <param name="input_paired2" value="r2.fastq" ftype="fastq"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
90 <param name="compress_fastq" value=""/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
91 <output name="output_paired1" file="r1_dedup.fastq" ftype="fastq"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
92 <output name="output_paired2" file="r2_dedup.fastq" ftype="fastq"/> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
93 </test> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
94 </tests> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
95 <help> <![CDATA[ |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
96 **Deduplicate paired fastq** is a fast and memory-efficient tool for removal of duplicates in paired short DNA sequence reads in fastq format. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
97 It identifies duplicates by concatenating the sequence of a readpair and calculating a short hash that uniquely identifies the concatenated sequence. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
98 Sequences that are not unique (i.e a hash of the concatenated sequence has been seen previously) are being discarded. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
99 |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
100 Compared to fastuniq this tool requires only a fraction of the memory, but does not identify pairs that are identical, |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
101 except for a switch of R1 and R2. Such reads may nevertheless align to different places based on the seed-searching of the aligner, |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
102 so this may or may not be a problem for your application. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
103 |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
104 Fastuniq consumed 76 GB of memory and took 4:01.52 on a typical dataset of 100nt 25 x 10^6 paired end reads, |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
105 while this tool took 4.7GB of memory and 3:23.27 for the same dataset. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
106 |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
107 Both tools produced the exact same result, arguing that, at least before quality and/or adapter trimming, |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
108 the previously mentioned limitations are of theoretical concern. |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
109 |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
110 ]]> </help> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
111 <citations> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
112 <citation type="doi">doi:10.1371/journal.pone.0052249</citation> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
113 </citations> |
f33e9e6a6c88
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff
changeset
|
114 </tool> |