Mercurial > repos > gbcs-embl-heidelberg > jemultiplexer
changeset 1:9764802ffae8 draft
Uploaded
author | gbcs-embl-heidelberg |
---|---|
date | Wed, 03 Sep 2014 04:11:49 -0400 |
parents | 687ced68db46 |
children | 1b79b43626ef |
files | jemultiplexer.xml |
diffstat | 1 files changed, 436 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jemultiplexer.xml Wed Sep 03 04:11:49 2014 -0400 @@ -0,0 +1,436 @@ +<tool id="jedebarcoding" name="Jemultiplexer"> + <description>Demultiplexes multiplexed data</description> + <requirements> + <requirement type="package" version=">=1.6">java</requirement> + </requirements> + <command interpreter="python"> + jemultiplexer.py + $MpxData1 + $output1 + $output1.id + $bsinputtype.barcodes + "$bsinputtype.barcode_list" + $__new_file_path__ + $MpxData1.ext + $bcodelen + $qualityFormat + $maxMismatches + $minBaseQuality + $minMismatchingDelta + $clipBarcodeCon.xTrimLen + $zTrimLen + $clipBarcodeCon.clipBarcode + $addBarcodeToHeader + $gzipOutput + $barcodeDiagFile + $rChar + #if $singlePaired.sPaired == "paired": + $singlePaired.barcodeReadPosCon.barcodeReadPos + #if $singlePaired.barcodeReadPosCon.barcodeReadPos == "BOTH": + $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching + #if $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching == "BOTH": + $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.redundantBarcode + $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.strict + $singlePaired.MpxData2 + #else: + $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcode + $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.strict + $singlePaired.MpxData2 + #end if + #else: + $singlePaired.barcodeReadPosCon.barcodeForSampleMatching + $singlePaired.barcodeReadPosCon.redundantBarcode + $singlePaired.barcodeReadPosCon.strict + $singlePaired.MpxData2 + #end if + #else: + $singlePaired.barcodeReadPos + $singlePaired.barcodeForSampleMatching + $singlePaired.redundantBarcode + $singlePaired.strict + $singlePaired.MpxData2 + #end if + </command> + <inputs> + <param type="data" format="gz,fastq" name="MpxData1" label="Compressed (or not) FASTQ file" /> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="MpxData2" type="hidden" value="single" /> + <param name="barcodeReadPos" type="hidden" value="none" /> + <param name="redundantBarcode" type="hidden" value="none" /> + <param name="barcodeForSampleMatching" type="hidden" value="none" /> + <param name="strict" type="hidden" value="none" /> + </when> + <when value="paired"> + <param name="MpxData2" type="data" format="gz,fastq" label="Compressed (or not) FASTQ file" /> + <conditional name="barcodeReadPosCon"> + <param name="barcodeReadPos" type="select" label="Barcode read position (BPOS)" help="where are the barcodes"> + <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> + <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> + <option value="BOTH">BOTH (beginning of both reads)</option> + </param> + <when value="READ_1"> + <param name="redundantBarcode" type="hidden" value="true" /> + <param name="barcodeForSampleMatching" type="hidden" value="BOTH" /> + <param name="strict" type="hidden" value="false" /> + </when> + <when value="READ_2"> + <param name="redundantBarcode" type="hidden" value="true" /> + <param name="barcodeForSampleMatching" type="hidden" value="BOTH" /> + <param name="strict" type="hidden" value="false" /> + </when> + <when value="BOTH"> + <conditional name="barcodeForSampleMatchingCon"> + <param name="barcodeForSampleMatching" type="select" label="Barcode for sample matching (BM)" help="which barcode should be used for sample look up (BM option)." > + <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> + <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> + <option value="BOTH">BOTH (beginning of both reads)</option> + </param> + <when value="READ_1"> + <param name="redundantBarcode" type="hidden" value="true" /> + <param name="strict" type="hidden" value="false" /> + </when> + <when value="READ_2"> + <param name="redundantBarcode" type="hidden" value="true" /> + <param name="strict" type="hidden" value="false" /> + </when> + <when value="BOTH"> + <conditional name="redundantBarcodeCon"> + <param name="redundantBarcode" type="select" label="Redundant barcodes (BRED)" help="are the barcode REDUNDANT i.e. do they both resolve to the same sample (BRED option)."> + <option value="true">True</option> + <option value="false" selected="true">False</option> + </param> + <when value="true"> + <param name="strict" type="select" label="Strict (S)" help="tells whether both barcodes should resolve to the same sample." > + <option value="true" selected="true">True</option> + <option value="false">False</option> + </param> + </when> + <when value="false"> + <param name="strict" type="hidden" value="false" /> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + + <conditional name="bsinputtype"> + <param name="bsinputtype_selector" type="select" label="Barcode set input type" help="You can either submit a barcode file or paste the list in a text field. Note: one sample per line."> + <option value="bs_file" selected="true">Use a .bs tab-delimited file in the history</option> + <option value="bs_textfield">Paste the barcodes list in a text field within the form</option> + </param> + <when value="bs_file"> + <param name="barcodes" type="data" format="bs" label="Barcode Set" /> + <param name="barcode_list" type="hidden" value="none" /> + </when> + <when value="bs_textfield"> + <param name="barcode_list" type="text" area="True" size="10x30" label="Barcode Set (<span style="color:brown;">one sample per line: &#60;sample_name&#62;&#60;tab or space&#62;&#60;barcode&#62;</span>)"/> + <param name="barcodes" type="hidden" value="none" /> + </when> + </conditional> + + <param name="bcodelen" type="text" value="6" label="Barcode Length (LEN)" /> + + <param name="qualityFormat" type="select" label="Fastq Quality Format (V)" help="if you need other quality coding format, contact the galaxy administrators."> + <option value="Standard" selected="true">Fastq - Illumina Casava V1.8 with Sanger coding quality (phred scaling + 33)</option> + <option value="Illumina">Fastq - Illumina v1.3 or above coding of quality (phred scaling + 64)</option> + <option value="Solexa">Fastq - Solexa-style quality (solexa scaling + 66)</option> + </param> + <param name="maxMismatches" type="text" value="1" label="Maxinum Mismatches (MM)" help="maximum mismatches for a barcode to be considered a match." /> + <param name="minBaseQuality" type="text" value="10" label="Minimum base quality (Q)" help="any barcode bases falling below this quality will be considered a mismatch even in the bases match." /> + <param name="minMismatchingDelta" type="text" value="1" label="Minimum mismatch difference (MMD)" help="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match." /> + <param name="zTrimLen" type="text" value="0" label="Extra number of bases to be trimmed from the barcode end (ZT)" /> + <conditional name="clipBarcodeCon"> + <param name="clipBarcode" type="select" label="Remove barcode sequence from read (C)" > + <option value="true" selected="true">True</option> + <option value="false">False</option> + </param> + <when value="true"> + <param name="xTrimLen" type="text" value="1" label="Extra number of base to be trimmed right after the barcode (XT)" /> + </when> + <when value="false"> + <param name="xTrimLen" type="hidden" value="1" /> + </when> + </conditional> + <param name="addBarcodeToHeader" type="select" label="Add matched barcode at the end of the read header (ADD)" > + <option value="true" selected="true">True</option> + <option value="false">False</option> + </param> + <param name="gzipOutput" type="select" label="Compress output (GZ)" > + <option value="true" selected="true">True</option> + <option value="false">False</option> + </param> + <param name="barcodeDiagFile" type="select" label="Output barcode match reporting file (DIAG)" > + <option value="false" selected="false">False</option> + <option value="true">True</option> + </param> + <param name="rChar" type="select" label="Replace white space in the read name/header with speficied symbols (RCHAR)" > + <option value="1" selected="1"> </option> + <option value="2">:</option> + <option value="3">_</option> + <option value="4">-</option> + </param> + </inputs> + <outputs> + <data format="html" name="output1" label="Demultiplexing stats on ${on_string}"/> + <!-- Keep this as a nice example of reformating; + <data format="fastqsanger" name="output1" metadata_source="MpxData1" label="Demultiplexing stats on ${on_string}"> + <change_format> + <when input="MpxData1.ext" value="fastqillumina" format="fastqillumina" /> + </change_format> + </data> --> + </outputs> + + <tests> + <test> + <param name="MpxData1" value="C1WLBACXX_lane7_1_sequence.txt" /> + <param name="MpxData2" value="C1WLBACXX_lane7_2_sequence.txt" /> + <param name="sPaired" value="paired" /> + <param name="bsinputtype_selector" value="bs_file" /> + <param name="barcodes" value="correct_barcodes_PE_both-ends_with_fnames.txt" /> + <param name="barcodeReadPos" value="BOTH" /> + <param name="barcodeForSampleMatching" value="BOTH" /> + <param name="redundantBarcode" value="false" /> + <param name="bcodelen" value="6" /> + <param name="qualityFormat" value="Standard" /> + <param name="maxMismatches" value="3" /> + <param name="minBaseQuality" value="20" /> + <param name="minMismatchingDelta" value="2" /> + <param name="zTrimLen" value="0" /> + <param name="clipBarcode" value="true" /> + <param name="xTrimLen" value="1" /> + <param name="addBarcodeToHeader" value="true" /> + <param name="gzipOutput" value="true" /> + <param name="barcodeDiagFile" value="true" /> + <param name="rChar" value="1" /> + <output name="output1" file="result.html" ftype="html"/> + </test> + </tests> + + <help> + +**What it does** + +Jemultiplexer : A fastq files demultiplexer with many neat options. Input files are fastq files, and can be in gzip compressed format (end in .gz). + +Author: Charles Girardot (charles.girardot@embl.de). + +Version: 1.0.3 + +------ + +**Know what you are doing** + +.. class:: warningmark + +You will want to read the `documentation`__. + + .. __: http://gbcs.embl.de/tikiwiki/JemultiplexerDocHome + +------ + +**Jemultiplexer parameter list** + +This is an exhaustive list of Jemultiplexer options:: + + FASTQ_FILE1=File + F1=File Input fastq file (optionally gzipped) for single end data, or first read in paired end data. + Required. + + FASTQ_FILE2=File + F2=File Input fastq file (optionally gzipped) for the second read of paired end data. + Default value: null. + + BARCODE_FILE=File + BF=File Barcode file describing sequence list and sample names. Tab-delimited file with 2 + columns, with the sample in col1 and the corresponding barcode in col2. + Note, make sure one sample per line. + If multiple barcode map to the same sample, barcodes can be combined using the OR operator '|'. + i.e. the file above can be re-written like + sample1 ATAT|GAGG + sample2 CCAA|TGTG + Finally, for the special situation of paired-end data in which barcodes differ at both + ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 + and read_2 can be distinguished using a ':' separator i.e. + sample1 ATAT:GAGG + sample2 CCAA:TGTG + Here understand that sample 1 is encoded with ATAT barcode at read_1 AND GAGG barcode at + read_2. Note that you can still combine barcodes using | e.g. + sample1 ATAT|GAGG:CCAA|TGTG + would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 + AND CCAA OR TGTG at read_2. + Required. + + BARCODE_READ_POS=BarcodePosition + BPOS=BarcodePosition For paired-end data, where to expect the barcode(s) : READ_1 (beginning of read from + FASTQ_FILE_1), READ_2 (beginning of read from FASTQ_FILE_2), BOTH (beginning of both + reads). Automatically set to READ_1 in single end mode. + Default value: BOTH. This option can be set to 'null' to clear the default value. + Possible values: {READ_1, READ_2, BOTH} + + REDUNDANT_BARCODES=Boolean + BRED=Boolean For paired-end data and when BARCODE_READ_POS == BOTH, this option indicates if both + read's barcodes encode redundant information, which is the usual situation + (REDUNDANT_BARCODES=true) i.e. barcodes are supposed to be the same at both ends or to + resolve to the same sample (when a pool of barcodes has been used for each sample). + When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode different + information. For example, only one of the barcodes encodes the sample the read belongs to + while the second barcode might be a random barcode to tell apart PCR artefacts from real + duplicates. Another example is when both barcodes should be used in a combined fashion + to resolve the sample. In the first example, you should use BPOS=BOTH BRED=false + BM=READ_1 while in the second example, you should have BPOS=BOTH BRED=false BM=BOTH (note + that with BPOS=BOTH BRED=true BM=BOTH), the behavior would be different as Jemultiplexer + would then check the STRICT option to perform sample resolution. + Importantly, when BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, BLEN, barcode + matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept + different values for both barcodes in the form X:Z where X and Z are 2 integers. + Default value: true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + + BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition + BM=BarcodePosition Automatically set to READ_1 in single end mode. + For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to + resolve sample : + - use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used + for sample matching, + - use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used + for sample matching, + - use BM=BOTH (beginning of both reads) if both should be used ; when BM=BOTH, the + behaviour of Jemultiplexer is different based on the value of REDUNDANT_BARCODES. + If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample + and Jemultiplexer uses the two barcodes according to the STRICT value. + If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each + sample (e.g. sample1 => AGAGTG:TTGATA) and Jemultiplexer needs both barcodes to find the + relevant sample. Note that this is the only situation in which all barcode matching + options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X + and Z are 2 integers. + Default value: BOTH. This option can be set to 'null' to clear the default value. + Possible values: {READ_1, READ_2, BOTH} + + STRICT=Boolean + S=Boolean For paired-end data and when BARCODE_READ_POS == BOTH and BM=BOTH, tells whether both + barcodes should resolve to the same sample. When true and if only one of the two reads + has a barcode match, the read pair is ignored. When false and if only one of the two + reads has a barcode match, the read pair is assigned to the corresponding sample ; in + cases where reads resolve to different samples, the read pair is ignored. + Default value: false. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + + BCLEN=String + LEN=String Length of the barcode sequences, optional. Taken from barcode file when not given. + In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct + length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing + the barcode length for read_1 and read_2 respectively. + Default value: null. + + MAX_MISMATCHES=String + MM=String Maximum mismatches for a barcode to be considered a match. MM=null is like MM=0 + In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH + (note that most likely BRED=false as it does not make great sense otherwise), two + distinct values can be given here using the syntax MM=X:Z where X and Z are 2 integers to + use for read_1 and read_2 respectively. + Default value: 1. This option can be set to 'null' to clear the default value. + + MIN_MISMATCH_DELTA=String + MMD=String Minimum difference between number of mismatches in the best and second best barcodes for + a barcode to be considered a match. MMD=null is like MMD=0 + In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH + (note that most likely BRED=false as it does not make great sense otherwise), two + distinct values can be given here using the syntax MMD=X:Z where X and Z are 2 integers + to use for read_1 and read_2 respectively. + Default value: 1. This option can be set to 'null' to clear the default value. + + MIN_BASE_QUALITY=String + Q=String Minimum base quality. Any barcode bases falling below this quality will be considered a + mismatch even in the bases match. Q=null is like Q=0. + In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH + (note that most likely BRED=false as it does not make great sense otherwise), two + distinct values can be given here using the syntax Q=X:Z where X and Z are 2 integers to + use for read_1 and read_2 respectively. + Default value: 10. This option can be set to 'null' to clear the default value. + + XTRIMLEN=String + XT=String Extra number of base to be trimmed right after the barcode (only used if + CLIP_BARCODE=true). Default is 1 as an extra 'T' (or 'A' depending how you see it) is + added for barcode ligation but this default will be adapated according to the rules + below. XT=null is like XT=0. + When running paired-end, two distinct values can be given using the syntax XT=X:Z where X + and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when + BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to + end up with reads of the same length (note that this can also be operated using ZT). If a + unique value is given, e.g. XT=1, while running paired-end the following rule applies : + (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode ; (2) + BPOS=BOTH, the value is used for both reads. + Default value: 1. This option can be set to 'null' to clear the default value. + + ZTRIMLEN=String + ZT=String Extra number of bases to be trimmed from the barcode end i.e. 3' end. Pretty handy when a + pipeline is set and you already know you'll trim read at a given size. ZT=null is like + ZT=0. + When running paired-end, two distinct values can be given here using the syntax ZT=X:Z + where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even + when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode + as to end up with reads of the same length (note that this can also be operated using + XT). Note that if a single value is passed, the value always aplly to both reads in + paired-end mode without further consideration. + Default value: 0. This option can be set to 'null' to clear the default value. + + CLIP_BARCODE=Boolean + C=Boolean Remove barcode sequence from read, as well as XTRIMLEN (and ZTRIMLEN) bases if + applicable, before writing to output file. If false, reads are written without + modification to output file. Apply to both barcodes when BPOS=BOTH. + Default value: true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + + ADD_BARCODE_TO_HEADER=Boolean + ADD=Boolean Add matched barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH. + If true, the string ':barcode' is added at the end of the read header with a ':' added + only if current read header does not end with ':'. + If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second + read also has its own matched barcode written. Else, the read without a barcode receives + the barcode from the barcoded read. + For example : + '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:' + becomes + '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE' + Default value: true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + + QUALITY_FORMAT=FastqQualityFormat + V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either 'Solexa' for + pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and + above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift + of 33. If this value is not specified (or 'null' is given), the quality format will be + detected automatically. + Default value: Standard. This option can be set to 'null' to clear the default value. + Possible values: {Solexa, Illumina, Standard} + + GZIP_OUTPUTS=Boolean + GZ=Boolean Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames. + Default value: true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + BARCODE_DIAG_FILE=String + DIAG=String Name for a barcode match reporting file (not generated by default).Either a name + (in which case the file will be created in the output dir) or full path. This file will contain + a line per read pair with the barcode best matching the read subsequence or 'null' when no match + is found accordign to matching parameters and the final selected sample. This file is useful for + debugging or further processing in case both ends are barcoded. + Default value: null + + READ_NAME_REPLACE_CHAR=String + RCHAR=String Replace spaces in read name/header using provided character. This is particularly handy + when you need to retain ADDed barcode in read name/header during mapping (everything + after space in read name is usually clipped in BAM files). For example, with RCHAR=':' : + '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:' + becomes + '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE' + Default value: null. + </help> +</tool>