Mercurial > repos > gbcs-embl-heidelberg > jemultiplexer
view jemultiplexer.xml @ 2:1b79b43626ef draft
Uploaded
author | gbcs-embl-heidelberg |
---|---|
date | Wed, 03 Sep 2014 04:12:06 -0400 |
parents | 9764802ffae8 |
children | 321b695b1a33 |
line wrap: on
line source
<tool id="jedebarcoding" name="Jemultiplexer"> <description>Demultiplexes multiplexed data</description> <requirements> <requirement type="package" version=">=1.6">java</requirement> </requirements> <command interpreter="python"> jemultiplexer.py $MpxData1 $output1 $output1.id $bsinputtype.barcodes "$bsinputtype.barcode_list" $__new_file_path__ $MpxData1.ext $bcodelen $qualityFormat $maxMismatches $minBaseQuality $minMismatchingDelta $clipBarcodeCon.xTrimLen $zTrimLen $clipBarcodeCon.clipBarcode $addBarcodeToHeader $gzipOutput $barcodeDiagFile $rChar #if $singlePaired.sPaired == "paired": $singlePaired.barcodeReadPosCon.barcodeReadPos #if $singlePaired.barcodeReadPosCon.barcodeReadPos == "BOTH": $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching #if $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching == "BOTH": $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.redundantBarcode $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.strict $singlePaired.MpxData2 #else: $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcode $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.strict $singlePaired.MpxData2 #end if #else: $singlePaired.barcodeReadPosCon.barcodeForSampleMatching $singlePaired.barcodeReadPosCon.redundantBarcode $singlePaired.barcodeReadPosCon.strict $singlePaired.MpxData2 #end if #else: $singlePaired.barcodeReadPos $singlePaired.barcodeForSampleMatching $singlePaired.redundantBarcode $singlePaired.strict $singlePaired.MpxData2 #end if </command> <inputs> <param type="data" format="gz,fastq" name="MpxData1" label="Compressed (or not) FASTQ file" /> <conditional name="singlePaired"> <param name="sPaired" type="select" label="Is this library mate-paired?"> <option value="single">Single-end</option> <option value="paired">Paired-end</option> </param> <when value="single"> <param name="MpxData2" type="hidden" value="single" /> <param name="barcodeReadPos" type="hidden" value="none" /> <param name="redundantBarcode" type="hidden" value="none" /> <param name="barcodeForSampleMatching" type="hidden" value="none" /> <param name="strict" type="hidden" value="none" /> </when> <when value="paired"> <param name="MpxData2" type="data" format="gz,fastq" label="Compressed (or not) FASTQ file" /> <conditional name="barcodeReadPosCon"> <param name="barcodeReadPos" type="select" label="Barcode read position (BPOS)" help="where are the barcodes"> <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> <option value="BOTH">BOTH (beginning of both reads)</option> </param> <when value="READ_1"> <param name="redundantBarcode" type="hidden" value="true" /> <param name="barcodeForSampleMatching" type="hidden" value="BOTH" /> <param name="strict" type="hidden" value="false" /> </when> <when value="READ_2"> <param name="redundantBarcode" type="hidden" value="true" /> <param name="barcodeForSampleMatching" type="hidden" value="BOTH" /> <param name="strict" type="hidden" value="false" /> </when> <when value="BOTH"> <conditional name="barcodeForSampleMatchingCon"> <param name="barcodeForSampleMatching" type="select" label="Barcode for sample matching (BM)" help="which barcode should be used for sample look up (BM option)." > <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> <option value="BOTH">BOTH (beginning of both reads)</option> </param> <when value="READ_1"> <param name="redundantBarcode" type="hidden" value="true" /> <param name="strict" type="hidden" value="false" /> </when> <when value="READ_2"> <param name="redundantBarcode" type="hidden" value="true" /> <param name="strict" type="hidden" value="false" /> </when> <when value="BOTH"> <conditional name="redundantBarcodeCon"> <param name="redundantBarcode" type="select" label="Redundant barcodes (BRED)" help="are the barcode REDUNDANT i.e. do they both resolve to the same sample (BRED option)."> <option value="true">True</option> <option value="false" selected="true">False</option> </param> <when value="true"> <param name="strict" type="select" label="Strict (S)" help="tells whether both barcodes should resolve to the same sample." > <option value="true" selected="true">True</option> <option value="false">False</option> </param> </when> <when value="false"> <param name="strict" type="hidden" value="false" /> </when> </conditional> </when> </conditional> </when> </conditional> </when> </conditional> <conditional name="bsinputtype"> <param name="bsinputtype_selector" type="select" label="Barcode set input type" help="You can either submit a barcode file or paste the list in a text field. Note: one sample per line."> <option value="bs_file" selected="true">Use a .bs tab-delimited file in the history</option> <option value="bs_textfield">Paste the barcodes list in a text field within the form</option> </param> <when value="bs_file"> <param name="barcodes" type="data" format="bs" label="Barcode Set" /> <param name="barcode_list" type="hidden" value="none" /> </when> <when value="bs_textfield"> <param name="barcode_list" type="text" area="True" size="10x30" label="Barcode Set (<span style="color:brown;">one sample per line: &#60;sample_name&#62;&#60;tab or space&#62;&#60;barcode&#62;</span>)"/> <param name="barcodes" type="hidden" value="none" /> </when> </conditional> <param name="bcodelen" type="text" value="6" label="Barcode Length (LEN)" /> <param name="qualityFormat" type="select" label="Fastq Quality Format (V)" help="if you need other quality coding format, contact the galaxy administrators."> <option value="Standard" selected="true">Fastq - Illumina Casava V1.8 with Sanger coding quality (phred scaling + 33)</option> <option value="Illumina">Fastq - Illumina v1.3 or above coding of quality (phred scaling + 64)</option> <option value="Solexa">Fastq - Solexa-style quality (solexa scaling + 66)</option> </param> <param name="maxMismatches" type="text" value="1" label="Maxinum Mismatches (MM)" help="maximum mismatches for a barcode to be considered a match." /> <param name="minBaseQuality" type="text" value="10" label="Minimum base quality (Q)" help="any barcode bases falling below this quality will be considered a mismatch even in the bases match." /> <param name="minMismatchingDelta" type="text" value="1" label="Minimum mismatch difference (MMD)" help="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match." /> <param name="zTrimLen" type="text" value="0" label="Extra number of bases to be trimmed from the barcode end (ZT)" /> <conditional name="clipBarcodeCon"> <param name="clipBarcode" type="select" label="Remove barcode sequence from read (C)" > <option value="true" selected="true">True</option> <option value="false">False</option> </param> <when value="true"> <param name="xTrimLen" type="text" value="1" label="Extra number of base to be trimmed right after the barcode (XT)" /> </when> <when value="false"> <param name="xTrimLen" type="hidden" value="1" /> </when> </conditional> <param name="addBarcodeToHeader" type="select" label="Add matched barcode at the end of the read header (ADD)" > <option value="true" selected="true">True</option> <option value="false">False</option> </param> <param name="gzipOutput" type="select" label="Compress output (GZ)" > <option value="true" selected="true">True</option> <option value="false">False</option> </param> <param name="barcodeDiagFile" type="select" label="Output barcode match reporting file (DIAG)" > <option value="false" selected="false">False</option> <option value="true">True</option> </param> <param name="rChar" type="select" label="Replace white space in the read name/header with speficied symbols (RCHAR)" > <option value="1" selected="1"> </option> <option value="2">:</option> <option value="3">_</option> <option value="4">-</option> </param> </inputs> <outputs> <data format="html" name="output1" label="Demultiplexing stats on ${on_string}"/> <!-- Keep this as a nice example of reformating; <data format="fastqsanger" name="output1" metadata_source="MpxData1" label="Demultiplexing stats on ${on_string}"> <change_format> <when input="MpxData1.ext" value="fastqillumina" format="fastqillumina" /> </change_format> </data> --> </outputs> <tests> <test> <param name="MpxData1" value="C1WLBACXX_lane7_1_sequence.txt" /> <param name="MpxData2" value="C1WLBACXX_lane7_2_sequence.txt" /> <param name="sPaired" value="paired" /> <param name="bsinputtype_selector" value="bs_file" /> <param name="barcodes" value="correct_barcodes_PE_both-ends_with_fnames.txt" /> <param name="barcodeReadPos" value="BOTH" /> <param name="barcodeForSampleMatching" value="BOTH" /> <param name="redundantBarcode" value="false" /> <param name="bcodelen" value="6" /> <param name="qualityFormat" value="Standard" /> <param name="maxMismatches" value="3" /> <param name="minBaseQuality" value="20" /> <param name="minMismatchingDelta" value="2" /> <param name="zTrimLen" value="0" /> <param name="clipBarcode" value="true" /> <param name="xTrimLen" value="1" /> <param name="addBarcodeToHeader" value="true" /> <param name="gzipOutput" value="true" /> <param name="barcodeDiagFile" value="true" /> <param name="rChar" value="1" /> <output name="output1" file="result.html" ftype="html"/> </test> </tests> <help> **What it does** Jemultiplexer : A fastq files demultiplexer with many neat options. Input files are fastq files, and can be in gzip compressed format (end in .gz). Author: Charles Girardot (charles.girardot@embl.de). Version: 1.0.3 ------ **Know what you are doing** .. class:: warningmark You will want to read the `documentation`__. .. __: http://gbcs.embl.de/tikiwiki/JemultiplexerDocHome ------ **Jemultiplexer parameter list** This is an exhaustive list of Jemultiplexer options:: FASTQ_FILE1=File F1=File Input fastq file (optionally gzipped) for single end data, or first read in paired end data. Required. FASTQ_FILE2=File F2=File Input fastq file (optionally gzipped) for the second read of paired end data. Default value: null. BARCODE_FILE=File BF=File Barcode file describing sequence list and sample names. Tab-delimited file with 2 columns, with the sample in col1 and the corresponding barcode in col2. Note, make sure one sample per line. If multiple barcode map to the same sample, barcodes can be combined using the OR operator '|'. i.e. the file above can be re-written like sample1 ATAT|GAGG sample2 CCAA|TGTG Finally, for the special situation of paired-end data in which barcodes differ at both ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 and read_2 can be distinguished using a ':' separator i.e. sample1 ATAT:GAGG sample2 CCAA:TGTG Here understand that sample 1 is encoded with ATAT barcode at read_1 AND GAGG barcode at read_2. Note that you can still combine barcodes using | e.g. sample1 ATAT|GAGG:CCAA|TGTG would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 AND CCAA OR TGTG at read_2. Required. BARCODE_READ_POS=BarcodePosition BPOS=BarcodePosition For paired-end data, where to expect the barcode(s) : READ_1 (beginning of read from FASTQ_FILE_1), READ_2 (beginning of read from FASTQ_FILE_2), BOTH (beginning of both reads). Automatically set to READ_1 in single end mode. Default value: BOTH. This option can be set to 'null' to clear the default value. Possible values: {READ_1, READ_2, BOTH} REDUNDANT_BARCODES=Boolean BRED=Boolean For paired-end data and when BARCODE_READ_POS == BOTH, this option indicates if both read's barcodes encode redundant information, which is the usual situation (REDUNDANT_BARCODES=true) i.e. barcodes are supposed to be the same at both ends or to resolve to the same sample (when a pool of barcodes has been used for each sample). When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode different information. For example, only one of the barcodes encodes the sample the read belongs to while the second barcode might be a random barcode to tell apart PCR artefacts from real duplicates. Another example is when both barcodes should be used in a combined fashion to resolve the sample. In the first example, you should use BPOS=BOTH BRED=false BM=READ_1 while in the second example, you should have BPOS=BOTH BRED=false BM=BOTH (note that with BPOS=BOTH BRED=true BM=BOTH), the behavior would be different as Jemultiplexer would then check the STRICT option to perform sample resolution. Importantly, when BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, BLEN, barcode matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept different values for both barcodes in the form X:Z where X and Z are 2 integers. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition BM=BarcodePosition Automatically set to READ_1 in single end mode. For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to resolve sample : - use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used for sample matching, - use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used for sample matching, - use BM=BOTH (beginning of both reads) if both should be used ; when BM=BOTH, the behaviour of Jemultiplexer is different based on the value of REDUNDANT_BARCODES. If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample and Jemultiplexer uses the two barcodes according to the STRICT value. If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each sample (e.g. sample1 => AGAGTG:TTGATA) and Jemultiplexer needs both barcodes to find the relevant sample. Note that this is the only situation in which all barcode matching options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X and Z are 2 integers. Default value: BOTH. This option can be set to 'null' to clear the default value. Possible values: {READ_1, READ_2, BOTH} STRICT=Boolean S=Boolean For paired-end data and when BARCODE_READ_POS == BOTH and BM=BOTH, tells whether both barcodes should resolve to the same sample. When true and if only one of the two reads has a barcode match, the read pair is ignored. When false and if only one of the two reads has a barcode match, the read pair is assigned to the corresponding sample ; in cases where reads resolve to different samples, the read pair is ignored. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} BCLEN=String LEN=String Length of the barcode sequences, optional. Taken from barcode file when not given. In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing the barcode length for read_1 and read_2 respectively. Default value: null. MAX_MISMATCHES=String MM=String Maximum mismatches for a barcode to be considered a match. MM=null is like MM=0 In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (note that most likely BRED=false as it does not make great sense otherwise), two distinct values can be given here using the syntax MM=X:Z where X and Z are 2 integers to use for read_1 and read_2 respectively. Default value: 1. This option can be set to 'null' to clear the default value. MIN_MISMATCH_DELTA=String MMD=String Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match. MMD=null is like MMD=0 In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (note that most likely BRED=false as it does not make great sense otherwise), two distinct values can be given here using the syntax MMD=X:Z where X and Z are 2 integers to use for read_1 and read_2 respectively. Default value: 1. This option can be set to 'null' to clear the default value. MIN_BASE_QUALITY=String Q=String Minimum base quality. Any barcode bases falling below this quality will be considered a mismatch even in the bases match. Q=null is like Q=0. In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (note that most likely BRED=false as it does not make great sense otherwise), two distinct values can be given here using the syntax Q=X:Z where X and Z are 2 integers to use for read_1 and read_2 respectively. Default value: 10. This option can be set to 'null' to clear the default value. XTRIMLEN=String XT=String Extra number of base to be trimmed right after the barcode (only used if CLIP_BARCODE=true). Default is 1 as an extra 'T' (or 'A' depending how you see it) is added for barcode ligation but this default will be adapated according to the rules below. XT=null is like XT=0. When running paired-end, two distinct values can be given using the syntax XT=X:Z where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to end up with reads of the same length (note that this can also be operated using ZT). If a unique value is given, e.g. XT=1, while running paired-end the following rule applies : (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode ; (2) BPOS=BOTH, the value is used for both reads. Default value: 1. This option can be set to 'null' to clear the default value. ZTRIMLEN=String ZT=String Extra number of bases to be trimmed from the barcode end i.e. 3' end. Pretty handy when a pipeline is set and you already know you'll trim read at a given size. ZT=null is like ZT=0. When running paired-end, two distinct values can be given here using the syntax ZT=X:Z where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to end up with reads of the same length (note that this can also be operated using XT). Note that if a single value is passed, the value always aplly to both reads in paired-end mode without further consideration. Default value: 0. This option can be set to 'null' to clear the default value. CLIP_BARCODE=Boolean C=Boolean Remove barcode sequence from read, as well as XTRIMLEN (and ZTRIMLEN) bases if applicable, before writing to output file. If false, reads are written without modification to output file. Apply to both barcodes when BPOS=BOTH. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} ADD_BARCODE_TO_HEADER=Boolean ADD=Boolean Add matched barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH. If true, the string ':barcode' is added at the end of the read header with a ':' added only if current read header does not end with ':'. If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second read also has its own matched barcode written. Else, the read without a barcode receives the barcode from the barcoded read. For example : '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:' becomes '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE' Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} QUALITY_FORMAT=FastqQualityFormat V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either 'Solexa' for pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift of 33. If this value is not specified (or 'null' is given), the quality format will be detected automatically. Default value: Standard. This option can be set to 'null' to clear the default value. Possible values: {Solexa, Illumina, Standard} GZIP_OUTPUTS=Boolean GZ=Boolean Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} BARCODE_DIAG_FILE=String DIAG=String Name for a barcode match reporting file (not generated by default).Either a name (in which case the file will be created in the output dir) or full path. This file will contain a line per read pair with the barcode best matching the read subsequence or 'null' when no match is found accordign to matching parameters and the final selected sample. This file is useful for debugging or further processing in case both ends are barcoded. Default value: null READ_NAME_REPLACE_CHAR=String RCHAR=String Replace spaces in read name/header using provided character. This is particularly handy when you need to retain ADDed barcode in read name/header during mapping (everything after space in read name is usually clipped in BAM files). For example, with RCHAR=':' : '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:' becomes '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE' Default value: null. </help> </tool>