annotate umi-tools_extract.xml @ 2:d1015c2516b7 draft

planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author iuc
date Wed, 10 Jan 2018 19:10:01 -0500
parents 79436b3019e9
children e73a22ff585c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.0">
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
3 <macros>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
4 <import>macros.xml</import>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
5 </macros>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
6 <expand macro="requirements" />
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
7 <command detect_errors="exit_code"><![CDATA[
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
8 #set $gz = False
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
9 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
10 #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
11 ln -s '$input_type.input_single' input_single.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
12 #set $gz = True
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
13 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
14 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
15 #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
16 ln -s '$input_type.input_read1' input_read1.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
17 ln -s '$input_type.input_read2' input_read2.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
18 #set $gz = True
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
19 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
20 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
21 umi_tools extract
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
22 --bc-pattern='$bc_pattern'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
23 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
24 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
25 --stdin=input_single.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
26 --stdout out.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
27 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
28 --stdin='$input_type.input_single'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
29 --stdout '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
30 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
31 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
32 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
33 --stdin=input_read1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
34 --read2-in=input_read2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
35 --stdout out1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
36 --read2-out=out2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
37 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
38 --stdin='$input_type.input_read1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
39 --read2-in='$input_type.input_read2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
40 --stdout '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
41 --read2-out='$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
42 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
43 #if $input_type.barcode.split == "1":
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
44 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
45 --bc-pattern2='$input_type.barcode.bc_pattern2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
46 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
47 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
48 #if not $prime3:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
49 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
50 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
51 #if $quality.quality_selector =='true':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
52 --quality-filter-threshold '$quality.quality_filter_threshold'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
53 --quality-encoding '$quality.quality_encoding'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
54 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
55 #if $print_log == "1":
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
56 --log='$out_log'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
57 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
58 --supress-stats
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
59 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
60 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
61 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
62 && mv out.gz '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
63 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
64 && mv out1.gz '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
65 && mv out2.gz '$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
66 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
67 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
68 ]]></command>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
69 <inputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
70 <conditional name="input_type">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
71 <param name="type" type="select" label="Library type">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
72 <option value="single">Single-end</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
73 <option value="paired">Paired-end</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
74 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
75 <when value="single">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
76 <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
77 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
78 <when value="paired">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
79 <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
80 <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
81 <conditional name="barcode">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
82 <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
83 <option value="0">Barcode on first read only</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
84 <option value="1">Barcode on both reads</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
85 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
86 <when value="0">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
87 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
88 <when value="1">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
89 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
90 help="Use this option to specify the format of the UMI/barcode for
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
91 the second read pair if required.">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
92 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
93 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
94 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
95 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
96 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
97 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
98 help="Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
99 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
100 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
101 bases, marked with an X will be reattached to the read.">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
102 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
103 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
104 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
105 help="By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
106 use this option to sepecify that it is on the 3' end instead." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
107 <param name="print_log" argument="-L" type="boolean" label="Output log?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
108 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
109 help="Choose if you want to generate a text file containing logging information." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
110 <conditional name="quality">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
111 <param name="quality_selector" type="select" label="Enable quality filter?" >
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
112 <option value="false">No</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
113 <option value="true">Yes</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
114 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
115 <when value="false">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
116 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
117 <when value="true">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
118 <param name="quality_filter_threshold" label="Phred score threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
119 type="integer" value="20" argument="--quality-filter-threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
120 help="Remove reads where any UMI base quality score falls below this threshold." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
121 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
122 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
123 <option value="phred33">phred33 [33-77]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
124 <option value="phred64">phred64 [64-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
125 <option value="solexa">solexa [59-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
126 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
127 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
128 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
129 </inputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
130 <outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
131 <data name="out" format_source="input_single">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
132 <filter>input_type['type'] == "single"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
133 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
134 <data name="out1" format_source="input_read1">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
135 <filter>input_type['type'] == "paired"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
136 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
137 <data name="out2" format_source="input_read2">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
138 <filter>input_type['type'] == "paired"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
139 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
140 <data name="out_log" format="txt">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
141 <filter>print_log == True</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
142 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
143 </outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
144 <tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
145 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
146 <param name="type" value="single" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
147 <param name="input_single" value="t_R1.fastq" ftype="fastq" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
148 <param name="bc_pattern" value="XXXNNN" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
149 <param name="prime3" value="0" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
150 <param name="quality_selector" value="true" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
151 <param name="quality_filter_threshold" value="10" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
152 <param name="quality_encoding" value="phred33" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
153 <output name="out" file="out_SE.fastq" />
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
154 <output name="out_log" file="out_single.log" lines_diff="22"/>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
155 </test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
156 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
157 <param name="type" value="paired" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
158 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
159 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
160 <param name="bc_pattern" value="NNNXXX" />
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
161 <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
162 <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
163 <output name="out_log" file="out_paired.log" lines_diff="16"/>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
164 </test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
165 </tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
166 <help><![CDATA[
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
167
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
168
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
169 UMI-tools extract.py - Extract UMI from fastq
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
170 =============================================
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
171
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
172 Purpose
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
173 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
174
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
175 Extract UMI barcode from a read and add it to the read name, leaving
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
176 any sample barcode in place. Can deal with paired end reads and UMIs
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
177 split across the paired ends
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
178
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
179 Options
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
180 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
181
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
182 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
183 By default the UMI is assumed to be on the first read. Use this
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
184 option if the UMI is contained on both reads and specify the
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
185 pattern of the barcode/UMI on the second read using the option
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
186 ``--bc-pattern2``
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
187
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
188 --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
189 Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
190 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
191 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
192 bases, marked with an X will be reattached to the read.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
193
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
194 E.g. If the pattern is NNXXNN,
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
195 Then the read:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
196
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
197 @HISEQ:87:00000000 read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
198 AAGGTTGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
199 DA1AEBFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
200 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
201
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
202 will become:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
203 @HISEQ:87:00000000_AATT read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
204 GGGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
205 1AFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
206 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
207
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
208 --bc-pattern2
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
209 Use this option to specify the format of the UMI/barcode for
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
210 the second read pair if required. If --bc-pattern2 is not
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
211 supplied, this defaults to the same pattern as --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
212
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
213 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
214 By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
215 use this option to sepecify that it is on the 3' end instead
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
216
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
217 -L
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
218 Specify a log file to retain logging information and final statistics
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
219
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
220 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
221 barcode is split across read pair
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
222
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
223 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
224 Remove reads where any UMI base quality score falls
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
225 below this threshold
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
226 --quality-encoding=QUALITY_ENCODING
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
227 Quality score encoding. Choose from phred33[33-77]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
228 phred64 [64-106] or solexa [59-106]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
229
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
230 Usage:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
231 ------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
232
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
233 For single ended reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
234 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
235
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
236 reads from stdin and outputs to stdout.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
237
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
238 For paired end reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
239 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
240
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
241 reads end one from stdin and end two from FASTQIN and outputs end one to stdin
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
242 and end two to FASTQOUT.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
243
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
244 ]]></help>
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
245 <expand macro="citations" />
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
246 </tool>