Mercurial > repos > iuc > crosscontamination_barcode_filter
diff crosscontamination_barcode_filter.xml @ 0:582b7bd4ae4c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 6f73edc667e61fabdab8b24a7ff40942588fee5b
author | iuc |
---|---|
date | Thu, 24 Jan 2019 09:52:58 -0500 |
parents | |
children | 253c9448f524 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crosscontamination_barcode_filter.xml Thu Jan 24 09:52:58 2019 -0500 @@ -0,0 +1,247 @@ +<tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@"> + <description>for use in plate-based barcoded analyses</description> + <macros> + <token name="@VERSION@">0.1</token> + <macro name="assert_conts" > + <assert_contents> + <has_text text="/CreationDate" /> + <has_text text="/Producer" /> + <has_line line="startxref" /> + <has_line line="%%EOF" /> + </assert_contents> + </macro> + <macro name="sanitize_batch"> + <sanitizer invalid_char=""> + <valid initial="string.digits"> + <add value=","/> + </valid> + </sanitizer> + </macro> + <macro name="sanitize_regex"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="!"/> + <add value="="/> + <add value="-"/> + <add value="."/> + <add value="*"/> + <add value="?"/> + <add value="+"/> + <add value="\\"/> + <add value="_"/> + <add value="["/> <!-- left square bracket, e.g subselecting from vec[1] --> + <add value="]"/> <!-- right square bracket --> + <add value="("/> <!-- left parenthesis --> + <add value=")"/> <!-- right parenthesis --> + </valid> + </sanitizer> + </macro> + </macros> + <requirements> + <requirement type="package" version="2.2.1" >r-ggplot2</requirement> + </requirements> + <version_command><![CDATA[ + Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' | head -1 | cut -d' ' -f 2 + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' '$crossconf' + ]]></command> + <configfiles> + <configfile name="crossconf"><![CDATA[ +script.dir = '$__tool_directory__/scripts' +input_matrix <- read.table( + '$input_table', + stringsAsFactors = F, + na.strings=c("NA", "-", "?", "."), + header=TRUE, + row.names=1 +) +input_matrix[is.na(input_matrix)] <- 0 +#if str($inbuilt_spec.select_use) == "mpi_sagar": +spec = list( + barcodes = '$input_barcodes', + format = list( + "1-96" = c(1,3,5,7), + "97-192" = c(2,4,6,8) + ), + plates = list( + "1" = c(1,2,3,4), + "2" = c(5,6,7,8) + ) +) +#elif str($inbuilt_spec.select_use) == "custom": +spec = list( + barcodes = '$input_barcodes', + format = list( + #for $i, $s in enumerate($inbuilt_spec.barcode_format) + "${s.range_start}-${s.range_end}" = c( ${s.batches} ) + #if $i < len(list($inbuilt_spec.barcode_format)) - 1 + , + #end if + #end for + ), + plates = list( + #for $i, $s in enumerate($inbuilt_spec.plate_format) + "${s.plate}" = c( ${s.batches} ) + #if $i < len(list($inbuilt_spec.plate_format)) - 1 + , + #end if + #end for + ) +) +#end if +regex.extract = '$advanced.regex_extract' +regex.display = '$advanced.regex_display' +out.pdf = '$out_plots' +out.table = '$out_table' +]]> + </configfile> + </configfiles> + <inputs> + <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" /> + <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" /> + <conditional name="inbuilt_spec" > + <param name="select_use" type="select" label="Plate Protocol" > + <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option> + <option value="custom">Custom</option> + </param> + <when value="mpi_sagar" /> + <when value="custom"> + <repeat name="barcode_format" title="Barcode Format" help="e.g. Batches 1 and 4 use barcodes 1-100 in the Barcodes file, and Batches 2 and 3 use barcodes 101-200 in the Barcodes file; specify '1' and '100' as Range values, and '1,4' as Batch values, and in the next format specify '101' and '200' as Range values and '2,3' as Batch values" > + <param name="range_start" type="integer" min="1" value="1" label="Barcode Range: Start" /> + <param name="range_end" type="integer" min="2" value="100" label="Barcode Range: End" /> + <param name="batches" type="text" value="1,4" label="Batches utilizing this Range" > + <expand macro="sanitize_batch" /> + </param> + </repeat> + <repeat name="plate_format" title="Plate Format" help="e.g. Plate 1 encompasses Batches 1-4, and Plate 2 encompasses Batches 5-8; specify '1' as a Plate value, and '1,2,3,4' as Batch values, and in the next format specify '2' as a Plate value and '5,6,7,8' as Batch values"> + <param name="plate" type="integer" min="1" value="1" label="Plate Number" /> + <param name="batches" type="text" value="1,2,3,4" label="Batches within this Plate Number" > + <expand macro="sanitize_batch" /> + </param> + </repeat> + </when> + </conditional> + <section name="advanced" expanded="false" title="RegEx Parameters" > + <param name="regex_extract" type="text" value=".*P(\\d)_(\\d)_([ACTG]+)" label="RegEx to extract Plate, Batch, and Barcodes from headers" > + <expand macro="sanitize_regex" /> + </param> + <param name="regex_display" type="text" value="P\\1_B\\2_\\3" label="RegEx to replace Plate, Batch, and Barcodes from headers" > + <expand macro="sanitize_regex" /> + </param> + </section> + </inputs> + <outputs> + <data name="out_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" /> + <data name="out_table" format="tabular" label="${tool.name} on ${on_string}: Filtered Table" /> + </outputs> + <tests> + <test><!-- Inbuilt MPI --> + <param name="input_table" value="out3.subtable" /> + <param name="input_barcodes" value="celseq_barcodes.192.raw" /> + <conditional name="inbuilt_spec" > + <param name="select_use" value="mpi_sagar" /> + </conditional> + <output name="out_plots" > + <expand macro="assert_conts" /> + </output> + <output name="out_table" value="test.table" /> + </test> + <test><!-- Plate and Lane test --> + <param name="input_table" value="out3.subtable" /> + <param name="input_barcodes" value="celseq_barcodes.192.raw" /> + <conditional name="inbuilt_spec" > + <param name="select_use" value="custom" /> + <repeat name="barcode_format" > + <param name="range_start" value="1"/> + <param name="range_end" value="96" /> + <param name="batches" value="1,3,5,7" /> + </repeat> + <repeat name="barcode_format" > + <param name="range_start" value="97"/> + <param name="range_end" value="192" /> + <param name="batches" value="2,4,6,8" /> + </repeat> + <repeat name="plate_format" > + <param name="plate" value="1" /> + <param name="batches" value="1,2,3,4" /> + </repeat> + <repeat name="plate_format" > + <param name="plate" value="2" /> + <param name="batches" value="5,6,7,8" /> + </repeat> + </conditional> + <output name="out_plots" > + <expand macro="assert_conts" /> + </output> + <output name="out_table" value="test.table" /> + </test> + </tests> + <help><![CDATA[ +Cross-contamination Filter Plot +################################### + +For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured. + +If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter). + +.. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png + :scale: 50 % + + +Example +~~~~~~~~ + +Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes. + +:: + + Barcodes + + 1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT + 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT + . + . + 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA + + + + Plate 1 +-------+-------+-------+-------+ + | B1 | B2 | B3 | B4 | + +-------+-------+-------+-------+ + 1-50 51-100 51-100 1-50 + + Plate 2 +-------+-------+-------+-------+ + | B5 | B6 | B7 | B8 | + +-------+-------+-------+-------+ + 1-40 41-80 1-40 41-80 + + Plate 3 +-------+-------+-------+-------+ + | B9 | B10 | B11 | B12 | + +-------+-------+-------+-------+ + 1-40 41-80 1-40 41-80 + + +**** + +The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below: + +:: + + *Barcodes → Batches* + 1- 50: B1, B4 + 51-100: B2, B3 + 1- 40: B5, B7, B9 , B11 + 41- 80: B6, B8, B10, B12 + + *Plates → Batches* + 1: B1, B2 , B3 , B4 + 2: B5, B6 , B7 , B8 + 3: B9, B10, B11, B12 + +]]></help> + <citations> + <citation type="doi">10.1007/978-1-4939-7768-0_15</citation> + </citations> +</tool> +