Mercurial > repos > iuc > crosscontamination_barcode_filter
view crosscontamination_barcode_filter.xml @ 1:253c9448f524 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit f967afe562781e5c8ed4e24e9d1e0bc3ebb29401
author | iuc |
---|---|
date | Mon, 03 Jun 2019 14:55:24 -0400 |
parents | 582b7bd4ae4c |
children | 5ade5cf200da |
line wrap: on
line source
<tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@"> <description>for use in plate-based barcoded analyses</description> <macros> <token name="@VERSION@">0.2</token> <macro name="sanitize_batch"> <sanitizer invalid_char=""> <valid initial="string.digits"> <add value=","/> </valid> </sanitizer> </macro> <macro name="sanitize_regex"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="!"/> <add value="="/> <add value="-"/> <add value="."/> <add value="*"/> <add value="?"/> <add value="+"/> <add value="\\"/> <add value="_"/> <add value="["/> <!-- left square bracket, e.g subselecting from vec[1] --> <add value="]"/> <!-- right square bracket --> <add value="("/> <!-- left parenthesis --> <add value=")"/> <!-- right parenthesis --> </valid> </sanitizer> </macro> </macros> <requirements> <requirement type="package" version="3.1.1">r-ggplot2</requirement> <requirement type="package" version="1.12.2">r-data.table </requirement> </requirements> <version_command><![CDATA[ Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' | head -1 | cut -d' ' -f 2 ]]></version_command> <command detect_errors="exit_code"><![CDATA[ Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' '$crossconf' ]]></command> <configfiles> <configfile name="crossconf"><![CDATA[ script.dir = '$__tool_directory__/scripts' input_matrix <- read.table( '$input_table', stringsAsFactors = F, na.strings=c("NA", "-", "?", "."), header=TRUE, row.names=1 ) input_matrix[is.na(input_matrix)] <- 0 #if str($inbuilt_spec.select_use) == "mpi_sagar": spec = list( barcodes = '$input_barcodes', format = list( "1-96" = c(1,3), "97-192" = c(2,4) ), plates = list( "1" = c(1,2), "2" = c(3,4) ) ) #elif str($inbuilt_spec.select_use) == "custom": spec = list( barcodes = '$input_barcodes', format = list( #for $i, $s in enumerate($inbuilt_spec.barcode_format) "${s.range_start}-${s.range_end}" = c( ${s.batches} ) #if $i < len(list($inbuilt_spec.barcode_format)) - 1 , #end if #end for ), plates = list( #for $i, $s in enumerate($inbuilt_spec.plate_format) "${s.plate}" = c( ${s.batches} ) #if $i < len(list($inbuilt_spec.plate_format)) - 1 , #end if #end for ) ) #end if regex.extract = '$advanced.regex_extract' regex.display = '$advanced.regex_display' out.pdf = '$out_plots' out.table = '$out_table' ]]> </configfile> </configfiles> <inputs> <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" /> <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" /> <conditional name="inbuilt_spec" > <param name="select_use" type="select" label="Plate Protocol" > <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option> <option value="custom">Custom</option> </param> <when value="mpi_sagar" /> <when value="custom"> <repeat name="barcode_format" title="Barcode Format" help="e.g. Batches 1 and 4 use barcodes 1-100 in the Barcodes file, and Batches 2 and 3 use barcodes 101-200 in the Barcodes file; specify '1' and '100' as Range values, and '1,4' as Batch values, and in the next format specify '101' and '200' as Range values and '2,3' as Batch values" > <param name="range_start" type="integer" min="1" value="1" label="Barcode Range: Start" /> <param name="range_end" type="integer" min="2" value="100" label="Barcode Range: End" /> <param name="batches" type="text" value="1,4" label="Batches utilizing this Range" > <expand macro="sanitize_batch" /> </param> </repeat> <repeat name="plate_format" title="Plate Format" help="e.g. Plate 1 encompasses Batches 1-4, and Plate 2 encompasses Batches 5-8; specify '1' as a Plate value, and '1,2,3,4' as Batch values, and in the next format specify '2' as a Plate value and '5,6,7,8' as Batch values"> <param name="plate" type="integer" min="1" value="1" label="Plate Number" /> <param name="batches" type="text" value="1,2,3,4" label="Batches within this Plate Number" > <expand macro="sanitize_batch" /> </param> </repeat> </when> </conditional> <section name="advanced" expanded="false" title="RegEx Parameters" > <param name="regex_extract" type="text" value=".*P(\\d)_B(\\d)_([ACTG]+)" label="RegEx to extract Plate, Batch, and Barcodes from headers" > <expand macro="sanitize_regex" /> </param> <param name="regex_display" type="text" value="P\\1_B\\2_\\3" label="RegEx to replace Plate, Batch, and Barcodes from headers" > <expand macro="sanitize_regex" /> </param> </section> </inputs> <outputs> <data name="out_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" /> <data name="out_table" format="tabular" label="${tool.name} on ${on_string}: Filtered Table" /> </outputs> <tests> <test><!-- Inbuilt MPI --> <param name="input_table" value="test.matrix" /> <param name="input_barcodes" value="celseq_barcodes.192.raw" /> <conditional name="inbuilt_spec" > <param name="select_use" value="mpi_sagar" /> </conditional> <output name="out_plots" value="out.pdf" compare="sim_size" /> <output name="out_table" value="out.table" /> </test> <test><!-- Plate and Lane test --> <param name="input_table" value="test.matrix" /> <param name="input_barcodes" value="celseq_barcodes.192.raw" /> <conditional name="inbuilt_spec" > <param name="select_use" value="custom" /> <repeat name="barcode_format" > <param name="range_start" value="1"/> <param name="range_end" value="96" /> <param name="batches" value="1,3" /> </repeat> <repeat name="barcode_format" > <param name="range_start" value="97"/> <param name="range_end" value="192" /> <param name="batches" value="2,4" /> </repeat> <repeat name="plate_format" > <param name="plate" value="1" /> <param name="batches" value="1,2" /> </repeat> <repeat name="plate_format" > <param name="plate" value="2" /> <param name="batches" value="3,4" /> </repeat> </conditional> <output name="out_plots" value="out.pdf" compare="sim_size" /> <output name="out_table" value="out.table" /> </test> </tests> <help><![CDATA[ Cross-contamination Filter Plot ################################### For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured. If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter). .. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png :scale: 50 % Example ~~~~~~~~ Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes. :: Barcodes 1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT . . 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA Plate 1 +-------+-------+-------+-------+ | B1 | B2 | B3 | B4 | +-------+-------+-------+-------+ 1-50 51-100 51-100 1-50 Plate 2 +-------+-------+-------+-------+ | B5 | B6 | B7 | B8 | +-------+-------+-------+-------+ 1-40 41-80 1-40 41-80 Plate 3 +-------+-------+-------+-------+ | B9 | B10 | B11 | B12 | +-------+-------+-------+-------+ 1-40 41-80 1-40 41-80 **** The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below: :: *Barcodes → Batches* 1- 50: B1, B4 51-100: B2, B3 1- 40: B5, B7, B9 , B11 41- 80: B6, B8, B10, B12 *Plates → Batches* 1: B1, B2 , B3 , B4 2: B5, B6 , B7 , B8 3: B9, B10, B11, B12 ]]></help> <citations> <citation type="doi">10.1007/978-1-4939-7768-0_15</citation> </citations> </tool>