comparison crosscontamination_barcode_filter.xml @ 0:582b7bd4ae4c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 6f73edc667e61fabdab8b24a7ff40942588fee5b
author iuc
date Thu, 24 Jan 2019 09:52:58 -0500
parents
children 253c9448f524
comparison
equal deleted inserted replaced
-1:000000000000 0:582b7bd4ae4c
1 <tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@">
2 <description>for use in plate-based barcoded analyses</description>
3 <macros>
4 <token name="@VERSION@">0.1</token>
5 <macro name="assert_conts" >
6 <assert_contents>
7 <has_text text="/CreationDate" />
8 <has_text text="/Producer" />
9 <has_line line="startxref" />
10 <has_line line="%%EOF" />
11 </assert_contents>
12 </macro>
13 <macro name="sanitize_batch">
14 <sanitizer invalid_char="">
15 <valid initial="string.digits">
16 <add value=","/>
17 </valid>
18 </sanitizer>
19 </macro>
20 <macro name="sanitize_regex">
21 <sanitizer invalid_char="">
22 <valid initial="string.letters,string.digits">
23 <add value="!"/>
24 <add value="="/>
25 <add value="-"/>
26 <add value="."/>
27 <add value="*"/>
28 <add value="?"/>
29 <add value="+"/>
30 <add value="\\"/>
31 <add value="_"/>
32 <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->
33 <add value="&#93;"/> <!-- right square bracket -->
34 <add value="&#40;"/> <!-- left parenthesis -->
35 <add value="&#41;"/> <!-- right parenthesis -->
36 </valid>
37 </sanitizer>
38 </macro>
39 </macros>
40 <requirements>
41 <requirement type="package" version="2.2.1" >r-ggplot2</requirement>
42 </requirements>
43 <version_command><![CDATA[
44 Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' | head -1 | cut -d' ' -f 2
45 ]]></version_command>
46 <command detect_errors="exit_code"><![CDATA[
47 Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' '$crossconf'
48 ]]></command>
49 <configfiles>
50 <configfile name="crossconf"><![CDATA[
51 script.dir = '$__tool_directory__/scripts'
52 input_matrix <- read.table(
53 '$input_table',
54 stringsAsFactors = F,
55 na.strings=c("NA", "-", "?", "."),
56 header=TRUE,
57 row.names=1
58 )
59 input_matrix[is.na(input_matrix)] <- 0
60 #if str($inbuilt_spec.select_use) == "mpi_sagar":
61 spec = list(
62 barcodes = '$input_barcodes',
63 format = list(
64 "1-96" = c(1,3,5,7),
65 "97-192" = c(2,4,6,8)
66 ),
67 plates = list(
68 "1" = c(1,2,3,4),
69 "2" = c(5,6,7,8)
70 )
71 )
72 #elif str($inbuilt_spec.select_use) == "custom":
73 spec = list(
74 barcodes = '$input_barcodes',
75 format = list(
76 #for $i, $s in enumerate($inbuilt_spec.barcode_format)
77 "${s.range_start}-${s.range_end}" = c( ${s.batches} )
78 #if $i < len(list($inbuilt_spec.barcode_format)) - 1
79 ,
80 #end if
81 #end for
82 ),
83 plates = list(
84 #for $i, $s in enumerate($inbuilt_spec.plate_format)
85 "${s.plate}" = c( ${s.batches} )
86 #if $i < len(list($inbuilt_spec.plate_format)) - 1
87 ,
88 #end if
89 #end for
90 )
91 )
92 #end if
93 regex.extract = '$advanced.regex_extract'
94 regex.display = '$advanced.regex_display'
95 out.pdf = '$out_plots'
96 out.table = '$out_table'
97 ]]>
98 </configfile>
99 </configfiles>
100 <inputs>
101 <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" />
102 <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" />
103 <conditional name="inbuilt_spec" >
104 <param name="select_use" type="select" label="Plate Protocol" >
105 <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option>
106 <option value="custom">Custom</option>
107 </param>
108 <when value="mpi_sagar" />
109 <when value="custom">
110 <repeat name="barcode_format" title="Barcode Format" help="e.g. Batches 1 and 4 use barcodes 1-100 in the Barcodes file, and Batches 2 and 3 use barcodes 101-200 in the Barcodes file; specify '1' and '100' as Range values, and '1,4' as Batch values, and in the next format specify '101' and '200' as Range values and '2,3' as Batch values" >
111 <param name="range_start" type="integer" min="1" value="1" label="Barcode Range: Start" />
112 <param name="range_end" type="integer" min="2" value="100" label="Barcode Range: End" />
113 <param name="batches" type="text" value="1,4" label="Batches utilizing this Range" >
114 <expand macro="sanitize_batch" />
115 </param>
116 </repeat>
117 <repeat name="plate_format" title="Plate Format" help="e.g. Plate 1 encompasses Batches 1-4, and Plate 2 encompasses Batches 5-8; specify '1' as a Plate value, and '1,2,3,4' as Batch values, and in the next format specify '2' as a Plate value and '5,6,7,8' as Batch values">
118 <param name="plate" type="integer" min="1" value="1" label="Plate Number" />
119 <param name="batches" type="text" value="1,2,3,4" label="Batches within this Plate Number" >
120 <expand macro="sanitize_batch" />
121 </param>
122 </repeat>
123 </when>
124 </conditional>
125 <section name="advanced" expanded="false" title="RegEx Parameters" >
126 <param name="regex_extract" type="text" value=".*P(\\d)_(\\d)_([ACTG]+)" label="RegEx to extract Plate, Batch, and Barcodes from headers" >
127 <expand macro="sanitize_regex" />
128 </param>
129 <param name="regex_display" type="text" value="P\\1_B\\2_\\3" label="RegEx to replace Plate, Batch, and Barcodes from headers" >
130 <expand macro="sanitize_regex" />
131 </param>
132 </section>
133 </inputs>
134 <outputs>
135 <data name="out_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" />
136 <data name="out_table" format="tabular" label="${tool.name} on ${on_string}: Filtered Table" />
137 </outputs>
138 <tests>
139 <test><!-- Inbuilt MPI -->
140 <param name="input_table" value="out3.subtable" />
141 <param name="input_barcodes" value="celseq_barcodes.192.raw" />
142 <conditional name="inbuilt_spec" >
143 <param name="select_use" value="mpi_sagar" />
144 </conditional>
145 <output name="out_plots" >
146 <expand macro="assert_conts" />
147 </output>
148 <output name="out_table" value="test.table" />
149 </test>
150 <test><!-- Plate and Lane test -->
151 <param name="input_table" value="out3.subtable" />
152 <param name="input_barcodes" value="celseq_barcodes.192.raw" />
153 <conditional name="inbuilt_spec" >
154 <param name="select_use" value="custom" />
155 <repeat name="barcode_format" >
156 <param name="range_start" value="1"/>
157 <param name="range_end" value="96" />
158 <param name="batches" value="1,3,5,7" />
159 </repeat>
160 <repeat name="barcode_format" >
161 <param name="range_start" value="97"/>
162 <param name="range_end" value="192" />
163 <param name="batches" value="2,4,6,8" />
164 </repeat>
165 <repeat name="plate_format" >
166 <param name="plate" value="1" />
167 <param name="batches" value="1,2,3,4" />
168 </repeat>
169 <repeat name="plate_format" >
170 <param name="plate" value="2" />
171 <param name="batches" value="5,6,7,8" />
172 </repeat>
173 </conditional>
174 <output name="out_plots" >
175 <expand macro="assert_conts" />
176 </output>
177 <output name="out_table" value="test.table" />
178 </test>
179 </tests>
180 <help><![CDATA[
181 Cross-contamination Filter Plot
182 ###################################
183
184 For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured.
185
186 If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter).
187
188 .. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png
189 :scale: 50 %
190
191
192 Example
193 ~~~~~~~~
194
195 Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes.
196
197 ::
198
199 Barcodes
200
201 1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT
202 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT
203 .
204 .
205 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA
206
207
208
209 Plate 1 +-------+-------+-------+-------+
210 | B1 | B2 | B3 | B4 |
211 +-------+-------+-------+-------+
212 1-50 51-100 51-100 1-50
213
214 Plate 2 +-------+-------+-------+-------+
215 | B5 | B6 | B7 | B8 |
216 +-------+-------+-------+-------+
217 1-40 41-80 1-40 41-80
218
219 Plate 3 +-------+-------+-------+-------+
220 | B9 | B10 | B11 | B12 |
221 +-------+-------+-------+-------+
222 1-40 41-80 1-40 41-80
223
224
225 ****
226
227 The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below:
228
229 ::
230
231 *Barcodes → Batches*
232 1- 50: B1, B4
233 51-100: B2, B3
234 1- 40: B5, B7, B9 , B11
235 41- 80: B6, B8, B10, B12
236
237 *Plates → Batches*
238 1: B1, B2 , B3 , B4
239 2: B5, B6 , B7 , B8
240 3: B9, B10, B11, B12
241
242 ]]></help>
243 <citations>
244 <citation type="doi">10.1007/978-1-4939-7768-0_15</citation>
245 </citations>
246 </tool>
247