changeset 1:9764802ffae8 draft

Uploaded
author gbcs-embl-heidelberg
date Wed, 03 Sep 2014 04:11:49 -0400
parents 687ced68db46
children 1b79b43626ef
files jemultiplexer.xml
diffstat 1 files changed, 436 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jemultiplexer.xml	Wed Sep 03 04:11:49 2014 -0400
@@ -0,0 +1,436 @@
+<tool id="jedebarcoding" name="Jemultiplexer">
+	<description>Demultiplexes multiplexed data</description>
+	<requirements>
+		<requirement type="package" version=">=1.6">java</requirement>
+	</requirements>
+	<command interpreter="python">
+	jemultiplexer.py
+	$MpxData1 
+	$output1 
+	$output1.id 
+	$bsinputtype.barcodes 
+	"$bsinputtype.barcode_list"
+	$__new_file_path__
+	$MpxData1.ext
+	$bcodelen
+	$qualityFormat
+	$maxMismatches
+	$minBaseQuality
+	$minMismatchingDelta
+	$clipBarcodeCon.xTrimLen
+	$zTrimLen
+	$clipBarcodeCon.clipBarcode
+	$addBarcodeToHeader
+	$gzipOutput
+	$barcodeDiagFile
+	$rChar
+	#if $singlePaired.sPaired  == "paired":
+	  $singlePaired.barcodeReadPosCon.barcodeReadPos
+	  #if $singlePaired.barcodeReadPosCon.barcodeReadPos == "BOTH":
+	    $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching
+	  	#if $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching == "BOTH":
+	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.redundantBarcode
+	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.strict
+	  	  $singlePaired.MpxData2
+	  	#else:
+	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcode
+	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.strict
+	  	  $singlePaired.MpxData2
+	  	#end if
+	  #else:
+	  	$singlePaired.barcodeReadPosCon.barcodeForSampleMatching
+		$singlePaired.barcodeReadPosCon.redundantBarcode
+		$singlePaired.barcodeReadPosCon.strict
+		$singlePaired.MpxData2
+	  #end if
+	#else:
+		$singlePaired.barcodeReadPos
+		$singlePaired.barcodeForSampleMatching
+		$singlePaired.redundantBarcode
+		$singlePaired.strict
+		$singlePaired.MpxData2
+	#end if
+	</command>
+	<inputs>		
+		<param type="data" format="gz,fastq" name="MpxData1" label="Compressed (or not) FASTQ file" />
+		<conditional name="singlePaired">
+			<param name="sPaired" type="select" label="Is this library mate-paired?">
+				<option value="single">Single-end</option>
+				<option value="paired">Paired-end</option>
+			</param>
+			<when value="single">
+				<param name="MpxData2" type="hidden" value="single" />
+				<param name="barcodeReadPos" type="hidden" value="none" />
+				<param name="redundantBarcode" type="hidden" value="none" />
+				<param name="barcodeForSampleMatching" type="hidden" value="none" />
+				<param name="strict" type="hidden" value="none" />
+			</when>
+			<when value="paired">
+				<param name="MpxData2" type="data" format="gz,fastq" label="Compressed (or not) FASTQ file" />
+				<conditional name="barcodeReadPosCon">
+					<param name="barcodeReadPos" type="select" label="Barcode read position (BPOS)" help="where are the barcodes">
+						<option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
+						<option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
+						<option value="BOTH">BOTH (beginning of both reads)</option>
+					</param>
+					<when value="READ_1">
+						<param name="redundantBarcode" type="hidden" value="true" />
+						<param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
+						<param name="strict" type="hidden" value="false" />
+					</when>
+					<when value="READ_2">
+						<param name="redundantBarcode" type="hidden" value="true" />
+						<param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
+						<param name="strict" type="hidden" value="false" />
+					</when>
+					<when value="BOTH">
+						<conditional name="barcodeForSampleMatchingCon">
+							<param name="barcodeForSampleMatching" type="select" label="Barcode for sample matching (BM)" help="which barcode should be used for sample look up (BM option)." >
+								<option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
+								<option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
+								<option value="BOTH">BOTH (beginning of both reads)</option>
+							</param>
+							<when value="READ_1">
+								<param name="redundantBarcode" type="hidden" value="true" />
+								<param name="strict" type="hidden" value="false" />
+							</when>
+							<when value="READ_2">
+								<param name="redundantBarcode" type="hidden" value="true" />
+								<param name="strict" type="hidden" value="false" />
+							</when>
+							<when value="BOTH">
+								<conditional name="redundantBarcodeCon">
+									<param name="redundantBarcode" type="select" label="Redundant barcodes (BRED)" help="are the barcode REDUNDANT i.e. do they both resolve to the same sample (BRED option).">
+										<option value="true">True</option>
+										<option value="false" selected="true">False</option>
+									</param>
+									<when value="true">
+										<param name="strict" type="select" label="Strict (S)" help="tells whether both barcodes should resolve to the same sample." >
+											<option value="true" selected="true">True</option>
+											<option value="false">False</option>
+										</param>
+									</when>
+									<when value="false">
+										<param name="strict" type="hidden" value="false" />
+									</when>
+								</conditional>
+							</when>
+						</conditional>
+					</when>
+				</conditional>
+			</when>	    
+		</conditional>
+	  
+		<conditional name="bsinputtype">		
+			<param name="bsinputtype_selector" type="select" label="Barcode set input type" help="You can either submit a barcode file or paste the list in a text field. Note: one sample per line.">
+				<option value="bs_file" selected="true">Use a .bs tab-delimited file in the history</option>
+				<option value="bs_textfield">Paste the barcodes list in a text field within the form</option>
+			</param>
+			<when value="bs_file">
+				<param name="barcodes" type="data" format="bs" label="Barcode Set" />
+				<param name="barcode_list" type="hidden" value="none" />
+			</when>
+			<when value="bs_textfield">
+				<param name="barcode_list" type="text" area="True" size="10x30" label="Barcode Set (&#60;span style=&#34;color:brown;&#34;&#62;one sample per line: &#38;#60&#59;sample_name&#38;#62&#59;&#38;#60&#59;tab or space&#38;#62&#59;&#38;#60&#59;barcode&#38;#62&#59;&#60;/span&#62;)"/>
+				<param name="barcodes" type="hidden" value="none" />
+			</when>		
+		</conditional>	
+	  
+		<param name="bcodelen" type="text" value="6" label="Barcode Length (LEN)" /> 
+	  
+		<param name="qualityFormat" type="select" label="Fastq Quality Format (V)" help="if you need other quality coding format, contact the galaxy administrators.">
+			<option value="Standard" selected="true">Fastq - Illumina Casava V1.8 with Sanger coding quality (phred scaling + 33)</option>
+			<option value="Illumina">Fastq - Illumina v1.3 or above coding of quality (phred scaling + 64)</option>
+			<option value="Solexa">Fastq - Solexa-style quality (solexa scaling + 66)</option>
+		</param>
+		<param name="maxMismatches" type="text" value="1" label="Maxinum Mismatches (MM)" help="maximum mismatches for a barcode to be considered a match." /> 	  
+		<param name="minBaseQuality" type="text" value="10" label="Minimum base quality (Q)" help="any barcode bases falling below this quality will be considered a mismatch even in the bases match." />
+		<param name="minMismatchingDelta" type="text" value="1" label="Minimum mismatch difference (MMD)" help="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match." /> 
+		<param name="zTrimLen" type="text" value="0" label="Extra number of bases to be trimmed from the barcode end (ZT)" />
+		<conditional name="clipBarcodeCon">
+			<param name="clipBarcode" type="select" label="Remove barcode sequence from read (C)" >
+				<option value="true" selected="true">True</option>
+				<option value="false">False</option>
+			</param>
+			<when value="true">
+				<param name="xTrimLen" type="text" value="1" label="Extra number of base to be trimmed right after the barcode (XT)" />
+			</when>
+			<when value="false">
+				<param name="xTrimLen" type="hidden" value="1" />
+			</when>
+		</conditional>
+		<param name="addBarcodeToHeader" type="select" label="Add matched barcode at the end of the read header (ADD)" >
+			<option value="true" selected="true">True</option>
+			<option value="false">False</option>
+		</param>
+		<param name="gzipOutput" type="select" label="Compress output (GZ)" >
+			<option value="true" selected="true">True</option>
+			<option value="false">False</option>
+		</param>
+		<param name="barcodeDiagFile" type="select" label="Output barcode match reporting file (DIAG)" >
+			<option value="false" selected="false">False</option>
+			<option value="true">True</option>
+		</param>
+		<param name="rChar" type="select" label="Replace white space in the read name/header with speficied symbols (RCHAR)" >
+			<option value="1" selected="1"> </option>
+			<option value="2">:</option>
+			<option value="3">_</option>
+			<option value="4">-</option>
+		</param>
+	</inputs>
+	<outputs>
+		<data format="html" name="output1" label="Demultiplexing stats on ${on_string}"/> 
+	    <!-- Keep this as a nice example of reformating;
+	      <data format="fastqsanger" name="output1" metadata_source="MpxData1" label="Demultiplexing stats on ${on_string}">
+		<change_format>
+		  <when input="MpxData1.ext" value="fastqillumina" format="fastqillumina" />
+		</change_format>  
+		</data> -->
+	</outputs>
+	
+	<tests>
+		<test>
+			<param name="MpxData1" value="C1WLBACXX_lane7_1_sequence.txt" />
+			<param name="MpxData2" value="C1WLBACXX_lane7_2_sequence.txt" />
+			<param name="sPaired" value="paired" />
+			<param name="bsinputtype_selector" value="bs_file" />
+			<param name="barcodes" value="correct_barcodes_PE_both-ends_with_fnames.txt" />
+			<param name="barcodeReadPos" value="BOTH" />
+			<param name="barcodeForSampleMatching" value="BOTH" />
+			<param name="redundantBarcode" value="false" />
+			<param name="bcodelen" value="6" />
+			<param name="qualityFormat" value="Standard" />
+			<param name="maxMismatches" value="3" />
+			<param name="minBaseQuality" value="20" />
+			<param name="minMismatchingDelta" value="2" />
+			<param name="zTrimLen" value="0" />
+			<param name="clipBarcode" value="true" />
+			<param name="xTrimLen" value="1" />
+			<param name="addBarcodeToHeader" value="true" />
+			<param name="gzipOutput" value="true" />
+			<param name="barcodeDiagFile" value="true" />
+			<param name="rChar" value="1" />
+			<output name="output1" file="result.html" ftype="html"/>
+		</test>
+	</tests>
+	
+	<help>
+
+**What it does**
+
+Jemultiplexer : A fastq files demultiplexer with many neat options. Input files are fastq files, and can be in gzip compressed format (end in .gz).
+
+Author: Charles Girardot  (charles.girardot@embl.de).
+
+Version: 1.0.3
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+You will want to read the `documentation`__.
+
+ .. __: http://gbcs.embl.de/tikiwiki/JemultiplexerDocHome
+
+------
+
+**Jemultiplexer parameter list**
+
+This is an exhaustive list of Jemultiplexer options::
+
+  FASTQ_FILE1=File
+  F1=File                     Input fastq file (optionally gzipped) for single end data, or first read in paired end data. 
+                              Required. 
+
+  FASTQ_FILE2=File
+  F2=File                     Input fastq file (optionally gzipped) for the second read of paired end data. 
+                              Default value: null. 
+
+  BARCODE_FILE=File
+  BF=File                     Barcode file describing sequence list and sample names. Tab-delimited file with 2 
+                              columns, with the sample in col1 and the corresponding barcode in col2.
+                              Note, make sure one sample per line.
+                              If multiple barcode map to the same sample, barcodes can be combined using the OR operator '|'.
+                              i.e. the file above can be re-written like
+                               		sample1	ATAT|GAGG
+                              		sample2	CCAA|TGTG
+                              Finally, for the special situation of paired-end data in which barcodes differ at both 
+                              ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 
+                              and read_2 can be distinguished using a ':' separator i.e. 
+                              		sample1	ATAT:GAGG
+                              		sample2	CCAA:TGTG
+                              Here understand that sample 1 is encoded with ATAT barcode at read_1 AND GAGG barcode at 
+                              read_2. Note that you can still combine barcodes using | e.g. 
+                              		sample1	ATAT|GAGG:CCAA|TGTG
+                              would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 
+                              AND CCAA OR TGTG at read_2.
+                              Required. 
+
+  BARCODE_READ_POS=BarcodePosition
+  BPOS=BarcodePosition        For paired-end data, where to expect the barcode(s) : READ_1 (beginning of read from 
+                              FASTQ_FILE_1), READ_2 (beginning of read from FASTQ_FILE_2), BOTH (beginning of both 
+                              reads). Automatically set to READ_1 in single end mode.
+                              Default value: BOTH. This option can be set to 'null' to clear the default value. 
+                              Possible values: {READ_1, READ_2, BOTH} 
+
+  REDUNDANT_BARCODES=Boolean
+  BRED=Boolean                For paired-end data and when BARCODE_READ_POS == BOTH, this option indicates if both 
+                              read's barcodes encode redundant information, which is the usual situation 
+                              (REDUNDANT_BARCODES=true) i.e. barcodes are supposed to be the same at both ends or to 
+                              resolve to the same sample (when a pool of barcodes has been used for each sample).
+                              When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode different 
+                              information. For example, only one of the barcodes encodes the sample the read belongs to 
+                              while the second barcode might be a random barcode to tell apart PCR artefacts from real 
+                              duplicates. Another example is when both barcodes should be used  in a combined fashion 
+                              to resolve the sample. In the first example, you should use BPOS=BOTH BRED=false 
+                              BM=READ_1 while in the second example, you should have BPOS=BOTH BRED=false BM=BOTH (note 
+                              that with BPOS=BOTH BRED=true BM=BOTH), the behavior would be different as Jemultiplexer 
+                              would then check the STRICT option to perform sample resolution.
+                              Importantly, when BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, BLEN, barcode 
+                              matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept 
+                              different values for both barcodes in the form X:Z where X and Z are 2 integers.
+                              Default value: true. This option can be set to 'null' to clear the default value. 
+                              Possible values: {true, false} 
+
+  BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition
+  BM=BarcodePosition          Automatically set to READ_1 in single end mode. 
+                              For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to 
+                              resolve sample :
+                              	- use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used 
+                              for sample matching,
+                              	- use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used 
+                              for sample matching,
+                              	- use BM=BOTH (beginning of both reads) if both should be used ; when BM=BOTH, the 
+                              behaviour of Jemultiplexer is different based on the value of REDUNDANT_BARCODES.
+                              		If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample 
+                              and Jemultiplexer uses the two barcodes according to the STRICT value.
+                              		If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each 
+                              sample (e.g. sample1 => AGAGTG:TTGATA) and Jemultiplexer needs both barcodes to find the 
+                              relevant sample. Note that this is the only situation in which all barcode matching 
+                              options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X 
+                              and Z are 2 integers.
+                              Default value: BOTH. This option can be set to 'null' to clear the default value. 
+                              Possible values: {READ_1, READ_2, BOTH} 
+
+  STRICT=Boolean
+  S=Boolean                   For paired-end data and when BARCODE_READ_POS == BOTH and BM=BOTH, tells whether both 
+                              barcodes should resolve to the same sample. When true and if only one of the two reads 
+                              has a barcode match, the read pair is ignored. When false and if only one of the two 
+                              reads has a barcode match, the read pair is assigned to the corresponding sample ; in 
+                              cases where reads resolve to different samples, the read pair is ignored.
+                              Default value: false. This option can be set to 'null' to clear the default value. 
+                              Possible values: {true, false} 
+
+  BCLEN=String
+  LEN=String                  Length of the barcode sequences, optional. Taken from barcode file when not given.
+                              In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct 
+                              length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing 
+                              the barcode length for read_1 and read_2 respectively.
+                              Default value: null. 
+
+  MAX_MISMATCHES=String
+  MM=String                   Maximum mismatches for a barcode to be considered a match. MM=null is like MM=0
+                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
+                              (note that most likely BRED=false as it does not make great sense otherwise), two 
+                              distinct values can be given here using the syntax MM=X:Z where X and Z are 2 integers to 
+                              use for read_1 and read_2 respectively.
+                              Default value: 1. This option can be set to 'null' to clear the default value. 
+
+  MIN_MISMATCH_DELTA=String
+  MMD=String                  Minimum difference between number of mismatches in the best and second best barcodes for 
+                              a barcode to be considered a match. MMD=null is like MMD=0
+                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
+                              (note that most likely BRED=false as it does not make great sense otherwise), two 
+                              distinct values can be given here using the syntax MMD=X:Z where X and Z are 2 integers 
+                              to use for read_1 and read_2 respectively.
+                              Default value: 1. This option can be set to 'null' to clear the default value. 
+
+  MIN_BASE_QUALITY=String
+  Q=String                    Minimum base quality. Any barcode bases falling below this quality will be considered a 
+                              mismatch even in the bases match. Q=null is like Q=0.
+                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
+                              (note that most likely BRED=false as it does not make great sense otherwise), two 
+                              distinct values can be given here using the syntax Q=X:Z where X and Z are 2 integers to 
+                              use for read_1 and read_2 respectively.
+                              Default value: 10. This option can be set to 'null' to clear the default value. 
+
+  XTRIMLEN=String
+  XT=String                   Extra number of base to be trimmed right after the barcode (only used if 
+                              CLIP_BARCODE=true). Default is 1 as an extra 'T' (or 'A' depending how you see it) is 
+                              added for barcode ligation but this default will be adapated according to the rules 
+                              below. XT=null is like XT=0.
+                              When running paired-end, two distinct values can be given using the syntax XT=X:Z where X 
+                              and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when 
+                              BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to 
+                              end up with reads of the same length (note that this can also be operated using ZT). If a 
+                              unique value is given, e.g. XT=1, while running paired-end the following rule applies : 
+                              (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode ; (2) 
+                              BPOS=BOTH, the value is used for both reads.
+                              Default value: 1. This option can be set to 'null' to clear the default value. 
+
+  ZTRIMLEN=String
+  ZT=String                   Extra number of bases to be trimmed from the barcode end i.e. 3' end. Pretty handy when a 
+                              pipeline is set and you already know you'll trim read at a given size. ZT=null is like 
+                              ZT=0.
+                              When running paired-end, two distinct values can be given here using the syntax ZT=X:Z 
+                              where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even 
+                              when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode 
+                              as to end up with reads of the same length (note that this can also be operated using 
+                              XT). Note that if a single value is passed, the value always aplly to both reads in 
+                              paired-end mode without further consideration.
+                              Default value: 0. This option can be set to 'null' to clear the default value. 
+
+  CLIP_BARCODE=Boolean
+  C=Boolean                   Remove barcode sequence from read, as well as XTRIMLEN (and ZTRIMLEN) bases if 
+                              applicable, before writing to output file. If false, reads are written without 
+                              modification to output file. Apply to both barcodes when BPOS=BOTH.
+                              Default value: true. This option can be set to 'null' to clear the default value. 
+                              Possible values: {true, false} 
+
+  ADD_BARCODE_TO_HEADER=Boolean
+  ADD=Boolean                 Add matched barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH.
+                              If true, the string ':barcode' is added at the end of the read header with a ':' added 
+                              only if current read header does not end with ':'.
+                              If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second 
+                              read also has its own matched barcode written. Else, the read without a barcode receives 
+                              the barcode from the barcoded read.
+                              For example :
+                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
+                              becomes
+                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE'
+                              Default value: true. This option can be set to 'null' to clear the default value. 
+                              Possible values: {true, false} 
+
+  QUALITY_FORMAT=FastqQualityFormat
+  V=FastqQualityFormat        A value describing how the quality values are encoded in the fastq.  Either 'Solexa' for 
+                              pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and 
+                              above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift 
+                              of 33.  If this value is not specified (or 'null' is given), the quality format will be 
+                              detected automatically.
+                              Default value: Standard. This option can be set to 'null' to clear the default value. 
+                              Possible values: {Solexa, Illumina, Standard} 
+
+  GZIP_OUTPUTS=Boolean
+  GZ=Boolean                  Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames.
+                              Default value: true. This option can be set to 'null' to clear the default value. 
+                              Possible values: {true, false} 
+  BARCODE_DIAG_FILE=String
+  DIAG=String                 Name for a barcode match reporting file (not generated by default).Either a name 
+                              (in which case the file will be created in the output dir) or full path. This file will contain 
+                              a line per read pair with the barcode best matching the read subsequence or 'null' when no match 
+                              is found accordign to matching parameters and the final selected sample. This file is useful for 
+                              debugging or further processing in case both ends are barcoded.
+                              Default value: null
+
+  READ_NAME_REPLACE_CHAR=String
+  RCHAR=String                Replace spaces in read name/header using provided character. This is particularly handy 
+                              when you need to retain	 ADDed barcode in read name/header during mapping (everything 
+                              after space in read name is usually clipped in BAM files). For example, with RCHAR=':' :
+                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
+                              becomes
+                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE'
+                              Default value: null. 
+	</help>
+</tool>